Esempio n. 1
0
def preprocess(dbPath):
    '''
	This is a preprocess module
	'''
    logging = DefaultLogger()

    if not os.path.exists(dbPath):
        logging.debug('PreProcess: can\'t find database at path')
        return

    datastore = DataStore(dbPath)
    loopcount = 0

    while True:
        sleep(5)

        if loopcount % 10 == 0:
            logging.debug('PreProcess is alive')
        loopcount += 1

        data = datastore.recordsForHashing()
        for record in data:
            logging.debug(record)

            key_id = record.id
            filePath = record.fileName

            if not os.path.exists(filePath):
                logging.debug(
                    'PreProcess: Will update record status as the file no longer exists'
                )
                datastore.updateRecordAsMissingWithID(key_id)
                continue

            try:
                logging.debug('PreProcess: locking file to calculate hash...')
                ##UPDATE HASH OPERATION START HERE
                startTime = datetime.datetime.now().strftime(
                    "%Y-%m-%d %H:%M:%S")
                datastore.updateRecordWithHashStart(startTime, key_id)

                fileToHash = open(filePath, 'rb')
                portalocker.lock(fileToHash, portalocker.LOCK_SH)
                hashString = "NO_OP"  #hashForFile(fileToHash)
                endTime = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
                fileToHash.close()

                logging.debug('PreProcess: unlocking file...')
                logging.debug(
                    'PreProcess: Will update record status with Hash string and times'
                )

                datastore.updateRecordWithHashForStartTimeAndEndTime(
                    hashString, startTime, endTime, key_id)

            except Exception as e:
                info = 'PreProcess: There was an error when calculating the hash for file: ' + os.path.basename(
                    filePath) + ' ' + e.message
                sendFailureEmail(info)
                logging.error(e.message)
Esempio n. 2
0
def preprocess(dbPath):
	'''
	This is a preprocess module
	'''
	logging = DefaultLogger()

	if not os.path.exists(dbPath):
		logging.debug('PreProcess: can\'t find database at path')
		return

	datastore = DataStore(dbPath)
	loopcount = 0

	while True:
		sleep(5)

		if loopcount % 10 == 0:
			logging.debug('PreProcess is alive')
		loopcount += 1

		data = datastore.recordsForHashing()
		for record in data:
			logging.debug(record)

			key_id = record.id
			filePath = record.fileName

			if not os.path.exists(filePath):
				logging.debug('PreProcess: Will update record status as the file no longer exists')
				datastore.updateRecordAsMissingWithID(key_id)
				continue

			try:
				logging.debug('PreProcess: locking file to calculate hash...')
				##UPDATE HASH OPERATION START HERE
				startTime = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
				datastore.updateRecordWithHashStart(startTime, key_id)

				fileToHash = open(filePath, 'rb')
				portalocker.lock(fileToHash, portalocker.LOCK_SH)
				hashString = "NO_OP"#hashForFile(fileToHash) 
				endTime = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
				fileToHash.close()

				logging.debug('PreProcess: unlocking file...')
				logging.debug('PreProcess: Will update record status with Hash string and times')

				datastore.updateRecordWithHashForStartTimeAndEndTime(hashString, startTime, endTime, key_id)

			except Exception as e:
				info = 'PreProcess: There was an error when calculating the hash for file: ' + os.path.basename(filePath) + ' ' + e.message
				sendFailureEmail(info)
				logging.error(e.message)
Esempio n. 3
0
def jsonStringForAMNumber(amNumber):
	if amNumber == 'X9X9X9X9X9':
		return testDataForX9X9X9X9X9()
	try:
		address = 'http://archivemanager.mts.inbcu.com/ArchiveManager/api/requests?folder=' + str(amNumber)
		r = requests.get(address)
		if r.status_code == 200:
			return r.text
	except Exception as e:
		info = 'The server is unable to retrieve data from Archive Manager at %s for the Archive Manager entry %s. This likely means that the number is incorrect or the connection to Archive Manager is faulty.'
		info = info % (address, amNumber)
		sendFailureEmail(info)
	return None
Esempio n. 4
0
def jsonStringForAMNumber(amNumber):
    if amNumber == 'X9X9X9X9X9':
        return testDataForX9X9X9X9X9()
    try:
        address = 'http://archivemanager.mts.inbcu.com/ArchiveManager/api/requests?folder=' + str(
            amNumber)
        r = requests.get(address)
        if r.status_code == 200:
            return r.text
    except Exception as e:
        info = 'The server is unable to retrieve data from Archive Manager at %s for the Archive Manager entry %s. This likely means that the number is incorrect or the connection to Archive Manager is faulty.'
        info = info % (address, amNumber)
        sendFailureEmail(info)
    return None
Esempio n. 5
0
	def metaDataStringForNumber(self, numberString):
		if self.isDevMode == True:
			return self.mockDataAsString(numberString)
		try:
			# address = 'http://daisy.inbcu.com/daisy/asset/metadata/' + str(numberString)
			address = "http://qa.daisy.nbcuni.ge.com/daisy/asset/metadata/" + str(numberString)
			r = requests.get(address)
			if r.status_code == 200:
				return r.text
		except Exception as e:
			print e.message
			errorString = 'Unable to retrive daisy asset metadata for ' +  str(numberString) + "."
			errorString = errorString + " " + "Additional information: " + str(e.message)
			sendFailureEmail(errorString)
		return '[]'
Esempio n. 6
0
 def metaDataStringForNumber(self, numberString):
     if self.isDevMode == True:
         return self.mockDataAsString(numberString)
     try:
         # address = 'http://daisy.inbcu.com/daisy/asset/metadata/' + str(numberString)
         address = "http://qa.daisy.nbcuni.ge.com/daisy/asset/metadata/" + str(
             numberString)
         r = requests.get(address)
         if r.status_code == 200:
             return r.text
     except Exception as e:
         print e.message
         errorString = 'Unable to retrive daisy asset metadata for ' + str(
             numberString) + "."
         errorString = errorString + " " + "Additional information: " + str(
             e.message)
         sendFailureEmail(errorString)
     return '[]'
Esempio n. 7
0
def processRecordsReadyToBeHashed(data, datastore):

	logging = DefaultLogger()

	for record in data:
		logging.debug(record)

		key_id 				= record.id
		sourceFilePath 		= record.fileName
		filePath 			= record.operationFileName
		recordOperationType = record.operationType
		pathStructureName 	= record.pathStructureName			
		isBatch				= record.isBatch
		batchName			= record.batchName

		currentPathStructure = configurationOptions().pathStructureWithName(pathStructureName)
		finalPath = currentPathStructure['outBox']
		finalOriginalDestinationPath = currentPathStructure['originalBox']
		errorPath = currentPathStructure['errorBox']

		if not os.path.exists(filePath):
			# if the processed file doesn't exist, then move update the record and move to the error box
			# ADD LOGIC FOR BATCH PROCESSING
			logging.debug('PostProcess: Will update record status as the encrypted file does not exist')
			newPath = pathAfterSafelyMovingFileToDestinationFolder(sourceFilePath, errorPath)
			datastore.updateRecordAsMissingWithFileNameAndID(newPath, key_id)
			continue

		#CALCULATE HASH
		startTime = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
		datastore.updateRecordWithReHashStart(startTime, key_id)
		hashString = 'NO_HASH'

		#only hash files being decrypyted
		if recordOperationType == 'Decrypt':
			try:
				fileToHash = open(filePath, 'rb')
				logging.debug('PostProcess: locked file to calculate hash...')
				portalocker.lock(fileToHash, portalocker.LOCK_SH)
				hashString = hashForFile(fileToHash)
				logging.debug('PostProcess Hasher: unlocking file...')
				fileToHash.close()
			except Exception as e:
				hashString = 'HASH_GEN_ERROR'
		else:
			hashString = "NO_HASH_FOR_ENCRYPTED_FILES"

		endTime = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
		
		#ONLY DECRYPTED FILES' HASH IS CHECKED
		didChecksumFail = False
		checkSumErrorString = None

		if recordOperationType == 'Decrypt':
			fileBaseName = os.path.basename(filePath)
			if isBatch:
				fileBaseName = os.path.basename(filePath).split((batchName + "_"))[1]

			daisyNumber = getDaisyNumber(fileBaseName)

			try:
				errorRecordStatus = 0
				if daisyNumber == None:
					errorString = 'There was an error Decrypting the file: ' + fileBaseName + '.\n'
					errorString = errorString + 'Unable to retrieve Daisy Number for ' + filePath + ' ' + batchName
					logging.debug(errorString)
					errorRecordStatus = datastore.daisyEntryNotFoundStatusCode()
					raise Exception(errorString)

				originalChecksum = DaisyMetadataLookup(daisyNumber).checksumForFile(fileBaseName)
				
				if originalChecksum == None:
					errorString = 'There was an error Decrypting the file: ' + fileBaseName + '.\n'
					errorString = errorString + 'Unable to retrieve Checksum for ' + filePath + ' ' + batchName
					logging.debug(errorString)
					errorRecordStatus = datastore.checksumLookupFailedStatusCode()
					raise Exception(errorString)

				if originalChecksum.upper() != hashString.upper():
					errorString = 'Checksums do not match for file ' + filePath + '\n'
					errorString = errorString + ' ' + batchName + " expected the checksum: " + originalChecksum + '\n'
					errorString = errorString + " but found this checksum instead:" + hashString
					logging.debug(errorString)
					errorRecordStatus = datastore.checksumComparisonFailedStatusCode()
					raise Exception(errorString)

			except Exception as checksumException:
				#we have an error, so we must create a new folder in the error path
				#if the file is non-batch, then 
				logging.debug('PostProcess: The checksum failed. Please see the appropriate Error Box')
				checkSumErrorString = 'There was a checksum error.' + '\n' + checksumException.message
				didChecksumFail = True

			#If the file failed a checksum and is not a bacth file, then move it to the error box
			if didChecksumFail == True and isBatch == False:
				errorPathInformation = ''
				try:
					logging.debug('PostProcess: creating a Decrypted Checksum folder')
					errorDestination = createSafeFolderInDestinationFolder(errorPath, 'DECRYPTED_CHECKSUM_ERROR')
					try: 
						info = 'Moving the file that errored into the folder at ' + errorDestination
						logging.debug(info)
						shutil.move(filePath, os.path.join(errorDestination,fileBaseName))
						errorPathInformation = info
					except Exception as e:
						info = "PostProcess: " + e.message + ' an error occurred moving the file: ' + fileBaseName + ' to ' + errorDestination
						logging.debug(info)
				except Exception as e:
					info = 'PostProcess: An error occurred when moving the decrypted file in to the Error box'
					logging.debug(info)

				#THEN MOVE THE ENCRYPTED FILE ASIDE TO THE ERROR BOX
				try:
					info = 'Moving  the source file into the error box at ' + errorPath
					logging.debug(info)
					newPath = pathAfterSafelyMovingFileToDestinationFolder(sourceFilePath, errorPath)
					errorPathInformation = errorPathInformation + '\n' + info
				except Exception as e:
					info = "PostProcess: " + e.message + ' an error occurred moving the file: ' + sourceFilePath
					logging.debug(info)

				datastore.updateRecordStatusWithID(errorRecordStatus, key_id)
				info = checksumException.message + '\n' + errorPathInformation
				logging.debug(info)
				sendFailureEmail(info)
				continue
	
		#Lets now address the batch decrypted files

		newPath = filePath
		success = False

		if isBatch == True and recordOperationType == 'Decrypt':
			#create the destination folder for the Archive Manager Job
			amRecord = datastore.recordWithNumberFromAMJobsTable(batchName)
			if amRecord is None:
				#This should not happen as we don't even allow for the logic to proceed to this point without
				#a valid Archive Manager Record
				info = 'An error occurred where no data was found for the Archive Manager job ' + batchName + '\n'
				info = info + 'This error should not happen. Please check ' + os.path.dirname(filePath) + '\n'
				info = info + 'The files will need to be manually removed from the Decryption Queue.'
				logging.debug(info)
				sendFailureEmail(info)
				continue

			if didChecksumFail == True:
				#add checksum error string to archive manager job
				amRecord.errorString = amRecord.errorString + '\n' + checkSumErrorString
				datastore.updateArchiveManagerJobErrorString(amRecord, amRecord.errorString)

			#create the new folder in interim where we will push all of the batch files
			destinationAMFolder = os.path.join(os.path.dirname(filePath), batchName)
			if not os.path.exists(destinationAMFolder):
				try:
					os.mkdir(destinationAMFolder)
				except OSError as e:
					pass
			
			#get the file name, strip leading archive manager number
			originalFileName = os.path.basename(filePath)
			if isBatch == True:
				originalFileName = os.path.basename(filePath).split((batchName + "_"))[1]

			#this is where we will move the interim file, a new folder with its original name
			proposedAMPath = os.path.join(destinationAMFolder, originalFileName)

			#at this point the file should be in the a folder named after the batch
			try:
				newPath = pathAfterSafelyMovingFileToDestinationFile(filePath, proposedAMPath)
			except Exception as e:
				info = 'There was an error moving a file at %s for Archive Manager job %s. This will need to be manually addressed.' % (filePath, batchName)
				sendFailureEmail(info)
				continue

			if os.path.basename(originalFileName) != os.path.basename(newPath):
				#there was a collision, there really is no reason why this should happen, but lets account for it
				errorString = 'For some reason, there already exists a file in %s labeled %s' % (destinationAMFolder, originalFileName) + '\n'
				amRecord.errorString = amRecord.errorString + '\n' + errorString
				datastore.updateArchiveManagerJobErrorString(amRecord, amRecord.errorString)

			success = datastore.updateRecordWithFinalEncryptedPathAndHashForStartTimeAndEndTime(newPath, hashString, startTime, endTime, key_id)
			currentFiles = visibleFilesInFolder(destinationAMFolder)

			amPath = amRecord.amPath
			filesInJob = amRecord.allFilesInRecord()
			
			#are we finished, are all the files in place or the batch job?
			try:
				areAllFilesInPlace = True			
				for nFile in filesInJob:
					if nFile not in currentFiles:
						areAllFilesInPlace = False

				if areAllFilesInPlace == False:
					continue

				logging.debug('All files are in place')
				try:
					#remove old source folder
					logging.debug('PostProcess: removing original inbox')
					shutil.rmtree(amPath)
				except OSError as e:
					info = "PostProcess: " + e.message
					logging.debug(info)
					info = 'There was a problem removing the folder %s from the inbox after decrypting all of the files in the job.' % (amPath)
					sendFailureEmail(info)

				#refresh the record
				amRecord = datastore.recordWithNumberFromAMJobsTable(batchName)
				if amRecord is None:
					#This should not happen as we don't even allow for the logic to proceed to this point without
					#a valid Archive Manager Record
					info = 'An error occurred where no data was found for the Archive Manager job ' + batchName + '\n'
					info = info + 'This error should not happen. Please check ' + destinationAMFolder + '\n'
					info = info + 'The files will need to be manually removed from the Decryption Queue.'
					logging.debug(info)
					sendFailureEmail(info)
					continue

				#if there is an error, the redirect to the error box
				if amRecord.errorString != '':
					finalPath = errorPath
					#move the error files into a folder that indicates they are errors, it will live in the error box
					try:
						if datastore.updateArchiveManagerJobAsErrored(amRecord) == True:
							logging.debug('Job has finished, but there were some errors')
							logging.debug('PostProcess: will send email')
							info = 'Job %s has some errors! Please see the ErrorBox at %s' % (batchName, errorPath)
							info = info + '\n' + amRecord.errorString
							sendFailureEmail(info)
						else:
							logging.debug('PostProcess: Error saving Job')

						errDirname = os.path.dirname(destinationAMFolder)
						errBasename = os.path.basename(destinationAMFolder) + '_DECRYPTED_ERROR'
						os.rename(destinationAMFolder, os.path.join(errDirname, errBasename))
						destinationAMFolder = os.path.join(errDirname, errBasename)
						# shutil.move(destinationAMFolder, errorPath)
						pathAfterSafelyMovingFolderToDestinationFolder(destinationAMFolder, errorPath)

					except Exception as e:
						info = 'An error occurred when moving the errored files to %s.' % (errorPath,)
						logging.debug(info)
						sendFailureEmail(info)
				else:
					#No errors, move the files to the appropriate place
					print "No Errors finalPath", finalPath
					try:
						logging.debug('PostProcess: moving archive mananger folder to final destination')
						if os.path.exists(os.path.join(finalPath, os.path.basename(destinationAMFolder))):
							logging.debug('PostProcess: collision moving to duplicate box')
							altPath = pathAfterSafelyMovingFileToDestinationFolder(destinationAMFolder, finalPath)
						else:
							shutil.move(destinationAMFolder, finalPath)

						if datastore.updateArchiveManagerJobAsReadyToComplete(amRecord) == True:
							logging.debug('PostProcess: job is ready to complete')
							logging.debug('PostProcess: moving files and sending email')
							info = 'Job %s is complete! All of the files are decrypted and have appropriate matching checksums.' % (batchName)
							sendSuccessEmail(info)
						else:
							logging.debug('PostProcess: Error saving Job')	

					except OSError as e:
						#again, I am accounting for this error, I just don't know why I would ever encounter a situation like this
						info = 'There was a problem moving the folder %s to the outbox. You will have to move the file manually.' % (destinationAMFolder)
						info = info + " " + e.message
						sendFailureEmail(info)
						logging.debug(info)
						continue

			except Exception as e:
				info = 'An error occurred. Please see check the Decryption Queue for job %s. See Error: %s' % (batchName, e.message)
				logging.debug(info)
				sendFailureEmail(info)

		else:
			#LAST CASE FOR SINGLE MODE FILES LIKE ENCRYPTION AND SINGLE MODE DECRYPTION 
			newPath = pathAfterSafelyMovingFileToDestinationFolder(filePath, finalPath)	

			if not os.path.exists(newPath):
				logging.debug('PostProcess: Error moving file')
				continue

			logging.debug('PostProcess: Will update record status with Hash string and times')

			success = datastore.updateRecordWithFinalEncryptedPathAndHashForStartTimeAndEndTime(newPath, 
				hashString, startTime, endTime, key_id)

			if success == True:
				# move original file to original box
				try:
					newPath = pathAfterSafelyMovingFileToDestinationFolder(sourceFilePath, finalOriginalDestinationPath)
				except Exception as e:
					logging.debug('There was an error moving the file into place')
					info = 'There was an error moving file %s into the outbox at %s' % (sourceFilePath, finalOriginalDestinationPath)
					sendFailureEmail(info)

				if configurationOptions().shouldDeleteOriginal == True:
					try:
						os.remove(newPath)
					except OSError as e:
						logging.debug('PostProcess: Unable to delete the file', newPath)
Esempio n. 8
0
def postprocess(dbPath):
	'''
	This is the post process module
	'''
	
	if not os.path.exists(dbPath):
		logging.debug('PreProcess: can\'t find database at path')
		return

	datastore = DataStore(dbPath)

	loopcount = 0	

	while True:
		sleep(5)

		if loopcount % 10 == 0:
			logging.debug('PostProcess is alive')
		loopcount += 1

		#calculate checksums on decrypted files
		data = datastore.recordsForReHashing()

		processRecordsReadyToBeHashed(data, datastore)
		
		#delete associated files as the job was successful
		amRecords = datastore.archiveManagerJobsReadyToComplete()
		for amRecord in amRecords:
			dataStoreRecords = datastore.recordsForUUID(amRecord.uuid)
			for record in dataStoreRecords:
				recordPath = record.fileName
				if configurationOptions().shouldDeleteOriginal == True:
					try:
						os.remove(recordPath)
					except OSError as e:
						info = 'PostProcess: Unable to delete the file %s' % (recordPath,)
						logging.debug(info)
			datastore.updateArchiveManagerJobAsComplete(amRecord)


		#move the associated files to the error box as the job had problems
		amRecords = datastore.archiveManagerJobsThatErrored()
		for amRecord in amRecords:
			logging.debug('performing clean up with ' + amRecord.amNumber)

			batchName 			= amRecord.amNumber
			destinationAMFolder = ''
			errorPath 			= ''

			dataStoreRecords = datastore.recordsForUUID(amRecord.uuid)
			for record in dataStoreRecords:
				pathStructureName 		= record.pathStructureName
				filePath 				= record.fileName
				currentPathStructure 	= configurationOptions().pathStructureWithName(pathStructureName)
				errorPath 				= currentPathStructure['errorBox']
				print filePath

				destinationAMFolder = os.path.join(os.path.dirname(filePath), batchName)
				print 'This is where the working files will go.', destinationAMFolder

				if not os.path.exists(destinationAMFolder):
					try:
						os.mkdir(destinationAMFolder)
					except OSError as e:
						pass

				originalFileName = os.path.basename(filePath).split((batchName + "_"))[1]
				proposedAMPath = os.path.join(destinationAMFolder, originalFileName)

				try:
					# newPath = pathAfterSafelyMovingFileToDestinationFile(filePath, proposedAMPath)
					print filePath, proposedAMPath
					shutil.move(filePath, proposedAMPath)
				except Exception as e:
					info = 'There was an error moving a file at %s for Archive Manager job %s. This will need to be manually addressed.' % (filePath, batchName)
					sendFailureEmail(info)
					continue

				currentFiles = os.listdir(destinationAMFolder)
				filesInJob = amRecord.allFilesInRecord()

				areAllFilesInPlace = True			
				for nFile in filesInJob:
					if nFile not in currentFiles:
						areAllFilesInPlace = False
				if areAllFilesInPlace == True:
					print "moving files to the error path"
					try:
						pathAfterSafelyMovingFolderToDestinationFolder(destinationAMFolder,errorPath)
					except Exception as e:
						info = 'PostProcess: Unable to move the file %s' % (filePath,)
						logging.debug(info)
						info = 'There was an error moving the folder %s into the outbox at %s' % (destinationAMFolder, errorPath)
						info = info + '\n' + 'This will need to be addressed manually'
						sendFailureEmail(info)
						continue

			datastore.updateArchiveManagerJobAsComplete(amRecord)
Esempio n. 9
0
def checkSingleFiles(dbPath):
	logging = DefaultLogger()

	if not os.path.exists(dbPath):
		logging.debug('Acquire File: can\'t find database at path')
		return
	
	datastore = DataStore(dbPath)
	data = datastore.recordsForVerifying()

	for record in data:

		key_id 				= record.id
		filePath 			= record.fileName
		recordSize 			= int(record.fileSize)
		dateModifiedString 	= record.dateModified
		pathStructureName 	= record.pathStructureName
		operationType		= record.operationType
		isBatch				= record.isBatch
		batchName			= record.batchName

		dateLastModified = datetime.datetime.strptime(dateModifiedString, '%Y-%m-%d %H:%M:%S')
		timeDifference = datetime.datetime.now() - dateLastModified

		#This can change with an if/else should I decide I want to put temp files to be decrypted in another place
		sourcePath = configurationOptions().pathStructureWithName(pathStructureName)['inBox']
		workingPath = configurationOptions().pathStructureWithName(pathStructureName)['workingBox']

		if timeDifference.seconds < verificationWaitTime:
			continue

		lastSize = recordSize
		currentSize = 0

		if not os.path.exists(filePath):
			logging.debug('Acquire File: Will update record status as the file no longer exists')
			datastore.updateRecordAsMissingWithID(key_id)
			continue

		currentSize = os.path.getsize(filePath)

		if lastSize != currentSize:
			logging.debug(record)
			logging.debug('Acquire File: attempting db modify as file size has changed...')
			datastore.updateRecordWithCurrentSizeAndDateModifiedWithID(currentSize, datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), key_id)
			continue

		if currentSize == 0:
			continue
			# if the current size is zero, then continue until it isn't or never will be
			# its likely the file has been queued to copy but no data has been moved yet (actual OSX case) 
			
		logging.debug('Acquire File: attempting to lock the file to see if I own the file yet...')

		try:
			fileToCheck = open(filePath, 'rb')
			portalocker.lock(fileToCheck, portalocker.LOCK_EX)
			fileToCheck.close()
			logging.debug('Acquire File: proceeding to update the file status knowing that no one else is using it...')
		except Exception as e:
			logging.debug('Acquire File: unable to lock file as it is likely in use')
			continue

		if datastore.doesTheFilePathExistElseWhereInThePathStructure(filePath, operationType, pathStructureName) == True:
			duplicatePath = configurationOptions().pathStructureWithName(pathStructureName)['duplicateBox']
			newPath = pathAfterSafelyMovingFileToDestinationFolder(filePath, duplicatePath)
			datastore.updateRecordAsDuplicateWithNewPath(newPath, key_id)
			continue

		newPath = filePath
		#Only update 
		if isBatch == 1 and operationType == 'Decrypt':
			amRecord = None
			uuidString = fileNameForUUIDFileWithPath(os.path.dirname(filePath))

			if uuidString == None:
				#if I can't resolve the UUID, then resovle it though an AM Record
				#Does file's Archive Manager have data associated with it
				amRecord = datastore.recordWithNumberFromAMJobsTable(batchName)
				if amRecord == None:
					info = "Acquire File: Archive Manager data doesn't exist for " + filePath
					info = info + " " + "Marking file as having no AM Data. File will not be moved through the processing queue."
					logging.debug(info)
					datastore.updateRecordStatusWithID(datastore.noArchiveManagerDataExistsForRecord(), key_id)
					continue
			else:
				logging.debug('Updating record %s with UUID %s' % (filePath, uuidString))
				amRecord = datastore.archiveManagerJobsTableRecordWithUUID(uuidString)
				datastore.updateRecordAWithBatchUUIDReference(uuidString, key_id)
		else:
			#at this point, I need to subtract the file's main folder from the pathStructure['inBox']
			#this moves the file from the inbox to the working path
			try:
				newPath = pathAfterSafelyMovingFileToDestinationFolder(filePath, workingPath)
			except Exception as e:
				logging.debug('This shouldn\'t happen as pathAfterSafelyMovingFileToDestinationFolder should create a unique name that avoids any collisions, otherwise the file has been moved')
				logging.debug('Acquire File: Error moving file')
				info = 'There was a problem moving the file into into the queue for: ' + os.path.basename(filePath)
				info = info + '\n' + 'This will require manual intervention as the occurrence is unique.'
				sendFailureEmail(info)
				continue

			logging.debug('Acquire File: updating record file status and path....')
			datastore.updateRecordAsStaticWithNewPath(newPath, key_id)
Esempio n. 10
0
def checkArchiveManagerJobs(dbPath):
	logging = DefaultLogger()

	datastore = DataStore(dbPath)
	amRecords = datastore.archiveManagerJobsReadyToStart()

	for amRecord in amRecords:
		areAllFilesAvailableAndReady = True
		
		recordsInAMRecord = datastore.recordsForUUID(amRecord.uuid)
		filesInAMRecord = [x.fileName for x in recordsInAMRecord]
		filesInCurrentFolder = []

		try:
			filesInCurrentFolder = os.listdir(amRecord.amPath)
		except Exception as e:
			pass

		isThereAnUnknownFilePresent = False
		for currentFile in filesInCurrentFolder:
			if currentFile not in filesInAMRecord:
				isThereAnUnknownFilePresent = True

		if isThereAnUnknownFilePresent == True:
			logging.debug('Unknown files are present')
			pass
			#report error

		for currentFile in filesInAMRecord:
			logging.debug('%s' % (currentFile))
			lastComponent = os.path.basename(currentFile)
			if lastComponent not in filesInCurrentFolder:
				logging.debug('The following file is not yet available: %s' % (lastComponent))
				areAllFilesAvailableAndReady = False

		if areAllFilesAvailableAndReady == False:
			logging.debug('Not all of the files are staged yet')
			continue

		canLockAllRecords = True

		data = datastore.recordsForUUID(amRecord.uuid)

		for record in data:
			
			filePath = record.fileName

			try:
				fileToCheck = open(filePath, 'rb')
				portalocker.lock(fileToCheck, portalocker.LOCK_EX)
				fileToCheck.close()
				logging.debug('Acquire File: proceeding to update the file status knowing that no one else is using it...')
			except Exception as e:
				logging.debug('Acquire File: unable to lock file as it is likely in use')
				canLockAllRecords = False

		if canLockAllRecords == False:
			logging.debug('Can not lock all of the records yet')
			continue

		for record in data:

			key_id 				= record.id
			filePath 			= record.fileName
			recordSize 			= int(record.fileSize)
			dateModifiedString 	= record.dateModified
			pathStructureName 	= record.pathStructureName
			operationType		= record.operationType
			isBatch				= record.isBatch
			batchName			= record.batchName
			pathStructureName 	= record.pathStructureName

			newPath = filePath
			workingPath = configurationOptions().pathStructureWithName(pathStructureName)['workingBox']

			proposedBatchName = batchName + "_" + os.path.basename(filePath)
			proposedPath = os.path.join(os.path.dirname(filePath), proposedBatchName) 

			#we prepend the job name to the file here as it belongs to a batch
			try:
				if os.path.exists(proposedPath):
					raise Exception('file already exists')
				os.rename(filePath, proposedPath)
				filePath = proposedPath
			except Exception as e:
				#this is an unlikely occurrence
				info = 'There is a duplicate file in the queue for: ' + os.path.basename(filePath) + " " + e.message
				logging.debug(info)
				sendFailureEmail(info)
				continue

			#at this point, I need to subtract the file's main folder from the pathStructure['inBox']
			#this moves the file from the inbox to the working path
			try:
				newPath = pathAfterSafelyMovingFileToDestinationFolder(filePath, workingPath)
			except Exception as e:
				logging.debug('This shouldn\'t happen as pathAfterSafelyMovingFileToDestinationFolder should create a unique name that avoids any collisions, otherwise the file has been moved')
				logging.debug('Acquire File: Error moving file')
				info = 'There was a problem moving the file into into the queue for: ' + os.path.basename(filePath)
				info = info + '\n' + 'This will require manual intervention as the occurrence is unique.'
				sendFailureEmail(info)
				continue

			logging.debug('Acquire File: updating record file status and path....')
			datastore.updateRecordAsStaticWithNewPath(newPath, key_id)
Esempio n. 11
0
def checkSingleFiles(dbPath):
    logging = DefaultLogger()

    if not os.path.exists(dbPath):
        logging.debug('Acquire File: can\'t find database at path')
        return

    datastore = DataStore(dbPath)
    data = datastore.recordsForVerifying()

    for record in data:

        key_id = record.id
        filePath = record.fileName
        recordSize = int(record.fileSize)
        dateModifiedString = record.dateModified
        pathStructureName = record.pathStructureName
        operationType = record.operationType
        isBatch = record.isBatch
        batchName = record.batchName

        dateLastModified = datetime.datetime.strptime(dateModifiedString,
                                                      '%Y-%m-%d %H:%M:%S')
        timeDifference = datetime.datetime.now() - dateLastModified

        #This can change with an if/else should I decide I want to put temp files to be decrypted in another place
        sourcePath = configurationOptions().pathStructureWithName(
            pathStructureName)['inBox']
        workingPath = configurationOptions().pathStructureWithName(
            pathStructureName)['workingBox']

        if timeDifference.seconds < verificationWaitTime:
            continue

        lastSize = recordSize
        currentSize = 0

        if not os.path.exists(filePath):
            logging.debug(
                'Acquire File: Will update record status as the file no longer exists'
            )
            datastore.updateRecordAsMissingWithID(key_id)
            continue

        currentSize = os.path.getsize(filePath)

        if lastSize != currentSize:
            logging.debug(record)
            logging.debug(
                'Acquire File: attempting db modify as file size has changed...'
            )
            datastore.updateRecordWithCurrentSizeAndDateModifiedWithID(
                currentSize,
                datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), key_id)
            continue

        if currentSize == 0:
            continue
            # if the current size is zero, then continue until it isn't or never will be
            # its likely the file has been queued to copy but no data has been moved yet (actual OSX case)

        logging.debug(
            'Acquire File: attempting to lock the file to see if I own the file yet...'
        )

        try:
            fileToCheck = open(filePath, 'rb')
            portalocker.lock(fileToCheck, portalocker.LOCK_EX)
            fileToCheck.close()
            logging.debug(
                'Acquire File: proceeding to update the file status knowing that no one else is using it...'
            )
        except Exception as e:
            logging.debug(
                'Acquire File: unable to lock file as it is likely in use')
            continue

        if datastore.doesTheFilePathExistElseWhereInThePathStructure(
                filePath, operationType, pathStructureName) == True:
            duplicatePath = configurationOptions().pathStructureWithName(
                pathStructureName)['duplicateBox']
            newPath = pathAfterSafelyMovingFileToDestinationFolder(
                filePath, duplicatePath)
            datastore.updateRecordAsDuplicateWithNewPath(newPath, key_id)
            continue

        newPath = filePath
        #Only update
        if isBatch == 1 and operationType == 'Decrypt':
            amRecord = None
            uuidString = fileNameForUUIDFileWithPath(os.path.dirname(filePath))

            if uuidString == None:
                #if I can't resolve the UUID, then resovle it though an AM Record
                #Does file's Archive Manager have data associated with it
                amRecord = datastore.recordWithNumberFromAMJobsTable(batchName)
                if amRecord == None:
                    info = "Acquire File: Archive Manager data doesn't exist for " + filePath
                    info = info + " " + "Marking file as having no AM Data. File will not be moved through the processing queue."
                    logging.debug(info)
                    datastore.updateRecordStatusWithID(
                        datastore.noArchiveManagerDataExistsForRecord(),
                        key_id)
                    continue
            else:
                logging.debug('Updating record %s with UUID %s' %
                              (filePath, uuidString))
                amRecord = datastore.archiveManagerJobsTableRecordWithUUID(
                    uuidString)
                datastore.updateRecordAWithBatchUUIDReference(
                    uuidString, key_id)
        else:
            #at this point, I need to subtract the file's main folder from the pathStructure['inBox']
            #this moves the file from the inbox to the working path
            try:
                newPath = pathAfterSafelyMovingFileToDestinationFolder(
                    filePath, workingPath)
            except Exception as e:
                logging.debug(
                    'This shouldn\'t happen as pathAfterSafelyMovingFileToDestinationFolder should create a unique name that avoids any collisions, otherwise the file has been moved'
                )
                logging.debug('Acquire File: Error moving file')
                info = 'There was a problem moving the file into into the queue for: ' + os.path.basename(
                    filePath)
                info = info + '\n' + 'This will require manual intervention as the occurrence is unique.'
                sendFailureEmail(info)
                continue

            logging.debug(
                'Acquire File: updating record file status and path....')
            datastore.updateRecordAsStaticWithNewPath(newPath, key_id)
Esempio n. 12
0
def checkArchiveManagerJobs(dbPath):
    logging = DefaultLogger()

    datastore = DataStore(dbPath)
    amRecords = datastore.archiveManagerJobsReadyToStart()

    for amRecord in amRecords:
        areAllFilesAvailableAndReady = True

        recordsInAMRecord = datastore.recordsForUUID(amRecord.uuid)
        filesInAMRecord = [x.fileName for x in recordsInAMRecord]
        filesInCurrentFolder = []

        try:
            filesInCurrentFolder = os.listdir(amRecord.amPath)
        except Exception as e:
            pass

        isThereAnUnknownFilePresent = False
        for currentFile in filesInCurrentFolder:
            if currentFile not in filesInAMRecord:
                isThereAnUnknownFilePresent = True

        if isThereAnUnknownFilePresent == True:
            logging.debug('Unknown files are present')
            pass
            #report error

        for currentFile in filesInAMRecord:
            logging.debug('%s' % (currentFile))
            lastComponent = os.path.basename(currentFile)
            if lastComponent not in filesInCurrentFolder:
                logging.debug('The following file is not yet available: %s' %
                              (lastComponent))
                areAllFilesAvailableAndReady = False

        if areAllFilesAvailableAndReady == False:
            logging.debug('Not all of the files are staged yet')
            continue

        canLockAllRecords = True

        data = datastore.recordsForUUID(amRecord.uuid)

        for record in data:

            filePath = record.fileName

            try:
                fileToCheck = open(filePath, 'rb')
                portalocker.lock(fileToCheck, portalocker.LOCK_EX)
                fileToCheck.close()
                logging.debug(
                    'Acquire File: proceeding to update the file status knowing that no one else is using it...'
                )
            except Exception as e:
                logging.debug(
                    'Acquire File: unable to lock file as it is likely in use')
                canLockAllRecords = False

        if canLockAllRecords == False:
            logging.debug('Can not lock all of the records yet')
            continue

        for record in data:

            key_id = record.id
            filePath = record.fileName
            recordSize = int(record.fileSize)
            dateModifiedString = record.dateModified
            pathStructureName = record.pathStructureName
            operationType = record.operationType
            isBatch = record.isBatch
            batchName = record.batchName
            pathStructureName = record.pathStructureName

            newPath = filePath
            workingPath = configurationOptions().pathStructureWithName(
                pathStructureName)['workingBox']

            proposedBatchName = batchName + "_" + os.path.basename(filePath)
            proposedPath = os.path.join(os.path.dirname(filePath),
                                        proposedBatchName)

            #we prepend the job name to the file here as it belongs to a batch
            try:
                if os.path.exists(proposedPath):
                    raise Exception('file already exists')
                os.rename(filePath, proposedPath)
                filePath = proposedPath
            except Exception as e:
                #this is an unlikely occurrence
                info = 'There is a duplicate file in the queue for: ' + os.path.basename(
                    filePath) + " " + e.message
                logging.debug(info)
                sendFailureEmail(info)
                continue

            #at this point, I need to subtract the file's main folder from the pathStructure['inBox']
            #this moves the file from the inbox to the working path
            try:
                newPath = pathAfterSafelyMovingFileToDestinationFolder(
                    filePath, workingPath)
            except Exception as e:
                logging.debug(
                    'This shouldn\'t happen as pathAfterSafelyMovingFileToDestinationFolder should create a unique name that avoids any collisions, otherwise the file has been moved'
                )
                logging.debug('Acquire File: Error moving file')
                info = 'There was a problem moving the file into into the queue for: ' + os.path.basename(
                    filePath)
                info = info + '\n' + 'This will require manual intervention as the occurrence is unique.'
                sendFailureEmail(info)
                continue

            logging.debug(
                'Acquire File: updating record file status and path....')
            datastore.updateRecordAsStaticWithNewPath(newPath, key_id)
Esempio n. 13
0
    def on_created(self, event):
        '''
        if the path is a folder, then retrieve the archive manager request, save the the result to a new table called amjobs.
        the table will have the following columns id, data, complete. The data will have the json data from which i can retrieve 
        all of the files.
        '''
        if os.path.isdir(os.path.abspath(event.src_path)):
            info = "Created Folder: " +  event.src_path + " " + str(getsizeFolder(event.src_path))
            logging.debug(info)

            try:
                droppedFolder = event.src_path.split(self.pathStructure['inBox'])[1].split(os.sep)[1]
                pathComponents = [elem for elem in droppedFolder.split(os.sep) if elem != '']

                if len(pathComponents) == 1:
                    info = "will add " +  droppedFolder +  " to path"
                    logging.debug(info)

                    amDataAsString = jsonStringForAMNumber(droppedFolder)
                    if isAMDataValid(amDataAsString) == False:
                        self.dataStore.addAndMarkArchiveManagerJobToDataBaseAsUnkown(droppedFolder, event.src_path)
                        errorString = '''A folder was added to the Decrypt Path %s for which no Archive Manager Data was found. Check the name of the folder that was dropped and make sure that the Archive Manager request exists and that the Archive Manager is accessible. Files added to this folder will not be Decrypted until the error is resolved.'''
                        errorString = errorString % self.pathStructure['inBox']
                        raise Exception(errorString)

                    uuid = createFileWithUUIDatPath(event.src_path)
                    self.dataStore.addArchiveManagerJobToDataBaseWithUUID(droppedFolder, amDataAsString, event.src_path, uuid)

                elif len(pathComponents) > 1:
                    logging.debug('This folder path is nested and will not be accepted')
                    raise Exception('failed to get data from server')

            except Exception as e:
                info = e.message
                logging.debug(info)
                sendFailureEmail(info)

        else:
            #file

            try:
                droppedFile = event.src_path.split(self.pathStructure['inBox'])[1]
                pathComponents = [elem for elem in droppedFile.split(os.sep) if elem != '']
                
                if os.path.basename(event.src_path) in ['Thumbs.db', '.DS_Store']:
                    pass
                elif os.path.basename(event.src_path).startswith('UUID_'):
                    pass
                elif len(pathComponents) == 1:
                    #single file
                    pathToAdd = pathComponents[0]
                    self.dataStore.addFilePathToDataBaseStoreWithType(os.path.abspath(event.src_path), self.pathStructure['watchType'], self.pathStructure['name'])
                    info = "Created: " +  pathToAdd + " " + str(os.path.getsize(event.src_path))
                    logging.debug(info)
                elif len(pathComponents) == 2:
                    #ADD BATCH FLAG AND AM FOLDER NAME
                    batchName = pathComponents[0]
                    self.dataStore.addBatchFilePathToDataBaseStoreWithType(os.path.abspath(event.src_path), self.pathStructure['watchType'], self.pathStructure['name'], batchName)
                    info = "Created File: " +  event.src_path + " " + str(os.path.getsize(event.src_path))
                    logging.debug(info)
                    info = "will add " +  str(pathComponents) +  " to path"
                    logging.debug(info)  
                else:
                    raise Exception('This file path is nested OR incomplete and will not be accepted')
            except Exception as e:
                #GENERATE ERROR EMAIL
                info = e.message
                logging.debug(info)
                sendFailureEmail(info)
Esempio n. 14
0
def encrypt(dbPath):
	'''
	This process examines the database pointed to by dbPath. It 
	Looks for any records which have status 2 and has had a hash value calculated for it.
	'''
	logging = DefaultLogger()

	if not os.path.exists(dbPath):
		logging.debug('can\'t find database at path')
		return

	datastore = DataStore(dbPath)
	loopcount = 0

	while True:
		sleep(5)

		if loopcount % 10 == 0:
			logging.debug('Encryptor Process is alive')
		loopcount += 1

		data = datastore.recordsReadyToEncrypt()
		for record in data:
			logging.debug(record)

			key_id = record.id
			filePath = record.fileName	
			pathStructureName = record.pathStructureName

			if not os.path.exists(filePath):
				logging.debug('Encryptor: will update record status as the file no longer exists')
				datastore.updateRecordAsMissingWithID(key_id)
			else:
				options = configurationOptions()
				currentPathStructure = options.pathStructureWithName(pathStructureName)
				encryptionErrorPath = currentPathStructure['errorBox']
				encryptionInterimPath = currentPathStructure['interimBox']

				encryptedFilePath = os.path.join(encryptionInterimPath, os.path.basename(filePath))
				encryptionStart = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
				nextStatusValue = datastore.operationCompleteStatusCode()

				options.inputPath = filePath
				options.destinationPath = os.path.dirname(encryptedFilePath)

				##UPDATE THAT ENCRYPTION STARTS HERE
				datastore.updateRecordStatusWithOperationStart(encryptionStart, key_id)

				message = 'Encryptor: encrypting file ' + filePath
				logging.debug(message)

				#there is a bug with MediaSeal when encrypting an encrypted file,
				#this checks for this so that MediaSeal doesn't blow away the file.
				returnCode = -7
				fileToEncrypt = None
				try:
					fileToEncrypt = open(filePath, 'rb')
					portalocker.lock(fileToEncrypt, portalocker.LOCK_SH)
					returnCode = singleShotEncryptor(options)
				except Exception as e:
					logging.debug('unable to lock file')

				if fileToEncrypt is not None:
					fileToEncrypt.close()

				message = 'Encryptor: encrypted file with return code ' +  str(returnCode)
				logging.debug(message)

				encryptionStop = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")

				#try again should the connection be bad
				if returnCode == 2:
					sleep(5)
					returnCode = singleShotEncryptor(options)
					encryptionStart = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")

				#as we are encrypting single files, we can leave this logic the same
				if returnCode != 0:
					info = "There was a problem encrypting " + filePath + ". Encountered Error Code: " + str(returnCode) + ". The file will be moved to the path's Error box: " + encryptionErrorPath 
					sendFailureEmail(info)

					nextStatusValue = datastore.operationFailedStatusCode()
					encryptionStart = datetime.datetime(2000,1,1)
					encryptionStop = datetime.datetime(2000,1,1)

					if os.path.abspath(os.path.dirname(filePath)) != os.path.abspath(encryptionErrorPath):
						logging.debug('moving file to error path')
						if os.path.exists(encryptionErrorPath):
							try:
								newPath = pathAfterSafelyMovingFileToDestinationFolder(filePath, encryptionErrorPath)
							except Exception as e:
								logging.debug('Encryptor: Error moving file')
								
							nextStatusValue = datastore.errorMovingFileStatusCode()
						else:
							logging.debug('Encryptor: encryptionErrorPath doesnt exist')
							nextStatusValue = datastore.errorPathDoesntExistStatusCode()

				datastore.updateRecordStatusWithEncryptedFileNameAndStartAndEndTime(nextStatusValue, encryptedFilePath, encryptionStart, encryptionStop, key_id)
Esempio n. 15
0
def processRecordsReadyToBeHashed(data, datastore):

    logging = DefaultLogger()

    for record in data:
        logging.debug(record)

        key_id = record.id
        sourceFilePath = record.fileName
        filePath = record.operationFileName
        recordOperationType = record.operationType
        pathStructureName = record.pathStructureName
        isBatch = record.isBatch
        batchName = record.batchName

        currentPathStructure = configurationOptions().pathStructureWithName(
            pathStructureName)
        finalPath = currentPathStructure['outBox']
        finalOriginalDestinationPath = currentPathStructure['originalBox']
        errorPath = currentPathStructure['errorBox']

        if not os.path.exists(filePath):
            # if the processed file doesn't exist, then move update the record and move to the error box
            # ADD LOGIC FOR BATCH PROCESSING
            logging.debug(
                'PostProcess: Will update record status as the encrypted file does not exist'
            )
            newPath = pathAfterSafelyMovingFileToDestinationFolder(
                sourceFilePath, errorPath)
            datastore.updateRecordAsMissingWithFileNameAndID(newPath, key_id)
            continue

        #CALCULATE HASH
        startTime = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        datastore.updateRecordWithReHashStart(startTime, key_id)
        hashString = 'NO_HASH'

        #only hash files being decrypyted
        if recordOperationType == 'Decrypt':
            try:
                fileToHash = open(filePath, 'rb')
                logging.debug('PostProcess: locked file to calculate hash...')
                portalocker.lock(fileToHash, portalocker.LOCK_SH)
                hashString = hashForFile(fileToHash)
                logging.debug('PostProcess Hasher: unlocking file...')
                fileToHash.close()
            except Exception as e:
                hashString = 'HASH_GEN_ERROR'
        else:
            hashString = "NO_HASH_FOR_ENCRYPTED_FILES"

        endTime = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")

        #ONLY DECRYPTED FILES' HASH IS CHECKED
        didChecksumFail = False
        checkSumErrorString = None

        if recordOperationType == 'Decrypt':
            fileBaseName = os.path.basename(filePath)
            if isBatch:
                fileBaseName = os.path.basename(filePath).split(
                    (batchName + "_"))[1]

            daisyNumber = getDaisyNumber(fileBaseName)

            try:
                errorRecordStatus = 0
                if daisyNumber == None:
                    errorString = 'There was an error Decrypting the file: ' + fileBaseName + '.\n'
                    errorString = errorString + 'Unable to retrieve Daisy Number for ' + filePath + ' ' + batchName
                    logging.debug(errorString)
                    errorRecordStatus = datastore.daisyEntryNotFoundStatusCode(
                    )
                    raise Exception(errorString)

                originalChecksum = DaisyMetadataLookup(
                    daisyNumber).checksumForFile(fileBaseName)

                if originalChecksum == None:
                    errorString = 'There was an error Decrypting the file: ' + fileBaseName + '.\n'
                    errorString = errorString + 'Unable to retrieve Checksum for ' + filePath + ' ' + batchName
                    logging.debug(errorString)
                    errorRecordStatus = datastore.checksumLookupFailedStatusCode(
                    )
                    raise Exception(errorString)

                if originalChecksum.upper() != hashString.upper():
                    errorString = 'Checksums do not match for file ' + filePath + '\n'
                    errorString = errorString + ' ' + batchName + " expected the checksum: " + originalChecksum + '\n'
                    errorString = errorString + " but found this checksum instead:" + hashString
                    logging.debug(errorString)
                    errorRecordStatus = datastore.checksumComparisonFailedStatusCode(
                    )
                    raise Exception(errorString)

            except Exception as checksumException:
                #we have an error, so we must create a new folder in the error path
                #if the file is non-batch, then
                logging.debug(
                    'PostProcess: The checksum failed. Please see the appropriate Error Box'
                )
                checkSumErrorString = 'There was a checksum error.' + '\n' + checksumException.message
                didChecksumFail = True

            #If the file failed a checksum and is not a bacth file, then move it to the error box
            if didChecksumFail == True and isBatch == False:
                errorPathInformation = ''
                try:
                    logging.debug(
                        'PostProcess: creating a Decrypted Checksum folder')
                    errorDestination = createSafeFolderInDestinationFolder(
                        errorPath, 'DECRYPTED_CHECKSUM_ERROR')
                    try:
                        info = 'Moving the file that errored into the folder at ' + errorDestination
                        logging.debug(info)
                        shutil.move(
                            filePath,
                            os.path.join(errorDestination, fileBaseName))
                        errorPathInformation = info
                    except Exception as e:
                        info = "PostProcess: " + e.message + ' an error occurred moving the file: ' + fileBaseName + ' to ' + errorDestination
                        logging.debug(info)
                except Exception as e:
                    info = 'PostProcess: An error occurred when moving the decrypted file in to the Error box'
                    logging.debug(info)

                #THEN MOVE THE ENCRYPTED FILE ASIDE TO THE ERROR BOX
                try:
                    info = 'Moving  the source file into the error box at ' + errorPath
                    logging.debug(info)
                    newPath = pathAfterSafelyMovingFileToDestinationFolder(
                        sourceFilePath, errorPath)
                    errorPathInformation = errorPathInformation + '\n' + info
                except Exception as e:
                    info = "PostProcess: " + e.message + ' an error occurred moving the file: ' + sourceFilePath
                    logging.debug(info)

                datastore.updateRecordStatusWithID(errorRecordStatus, key_id)
                info = checksumException.message + '\n' + errorPathInformation
                logging.debug(info)
                sendFailureEmail(info)
                continue

        #Lets now address the batch decrypted files

        newPath = filePath
        success = False

        if isBatch == True and recordOperationType == 'Decrypt':
            #create the destination folder for the Archive Manager Job
            amRecord = datastore.recordWithNumberFromAMJobsTable(batchName)
            if amRecord is None:
                #This should not happen as we don't even allow for the logic to proceed to this point without
                #a valid Archive Manager Record
                info = 'An error occurred where no data was found for the Archive Manager job ' + batchName + '\n'
                info = info + 'This error should not happen. Please check ' + os.path.dirname(
                    filePath) + '\n'
                info = info + 'The files will need to be manually removed from the Decryption Queue.'
                logging.debug(info)
                sendFailureEmail(info)
                continue

            if didChecksumFail == True:
                #add checksum error string to archive manager job
                amRecord.errorString = amRecord.errorString + '\n' + checkSumErrorString
                datastore.updateArchiveManagerJobErrorString(
                    amRecord, amRecord.errorString)

            #create the new folder in interim where we will push all of the batch files
            destinationAMFolder = os.path.join(os.path.dirname(filePath),
                                               batchName)
            if not os.path.exists(destinationAMFolder):
                try:
                    os.mkdir(destinationAMFolder)
                except OSError as e:
                    pass

            #get the file name, strip leading archive manager number
            originalFileName = os.path.basename(filePath)
            if isBatch == True:
                originalFileName = os.path.basename(filePath).split(
                    (batchName + "_"))[1]

            #this is where we will move the interim file, a new folder with its original name
            proposedAMPath = os.path.join(destinationAMFolder,
                                          originalFileName)

            #at this point the file should be in the a folder named after the batch
            try:
                newPath = pathAfterSafelyMovingFileToDestinationFile(
                    filePath, proposedAMPath)
            except Exception as e:
                info = 'There was an error moving a file at %s for Archive Manager job %s. This will need to be manually addressed.' % (
                    filePath, batchName)
                sendFailureEmail(info)
                continue

            if os.path.basename(originalFileName) != os.path.basename(newPath):
                #there was a collision, there really is no reason why this should happen, but lets account for it
                errorString = 'For some reason, there already exists a file in %s labeled %s' % (
                    destinationAMFolder, originalFileName) + '\n'
                amRecord.errorString = amRecord.errorString + '\n' + errorString
                datastore.updateArchiveManagerJobErrorString(
                    amRecord, amRecord.errorString)

            success = datastore.updateRecordWithFinalEncryptedPathAndHashForStartTimeAndEndTime(
                newPath, hashString, startTime, endTime, key_id)
            currentFiles = visibleFilesInFolder(destinationAMFolder)

            amPath = amRecord.amPath
            filesInJob = amRecord.allFilesInRecord()

            #are we finished, are all the files in place or the batch job?
            try:
                areAllFilesInPlace = True
                for nFile in filesInJob:
                    if nFile not in currentFiles:
                        areAllFilesInPlace = False

                if areAllFilesInPlace == False:
                    continue

                logging.debug('All files are in place')
                try:
                    #remove old source folder
                    logging.debug('PostProcess: removing original inbox')
                    shutil.rmtree(amPath)
                except OSError as e:
                    info = "PostProcess: " + e.message
                    logging.debug(info)
                    info = 'There was a problem removing the folder %s from the inbox after decrypting all of the files in the job.' % (
                        amPath)
                    sendFailureEmail(info)

                #refresh the record
                amRecord = datastore.recordWithNumberFromAMJobsTable(batchName)
                if amRecord is None:
                    #This should not happen as we don't even allow for the logic to proceed to this point without
                    #a valid Archive Manager Record
                    info = 'An error occurred where no data was found for the Archive Manager job ' + batchName + '\n'
                    info = info + 'This error should not happen. Please check ' + destinationAMFolder + '\n'
                    info = info + 'The files will need to be manually removed from the Decryption Queue.'
                    logging.debug(info)
                    sendFailureEmail(info)
                    continue

                #if there is an error, the redirect to the error box
                if amRecord.errorString != '':
                    finalPath = errorPath
                    #move the error files into a folder that indicates they are errors, it will live in the error box
                    try:
                        if datastore.updateArchiveManagerJobAsErrored(
                                amRecord) == True:
                            logging.debug(
                                'Job has finished, but there were some errors')
                            logging.debug('PostProcess: will send email')
                            info = 'Job %s has some errors! Please see the ErrorBox at %s' % (
                                batchName, errorPath)
                            info = info + '\n' + amRecord.errorString
                            sendFailureEmail(info)
                        else:
                            logging.debug('PostProcess: Error saving Job')

                        errDirname = os.path.dirname(destinationAMFolder)
                        errBasename = os.path.basename(
                            destinationAMFolder) + '_DECRYPTED_ERROR'
                        os.rename(destinationAMFolder,
                                  os.path.join(errDirname, errBasename))
                        destinationAMFolder = os.path.join(
                            errDirname, errBasename)
                        # shutil.move(destinationAMFolder, errorPath)
                        pathAfterSafelyMovingFolderToDestinationFolder(
                            destinationAMFolder, errorPath)

                    except Exception as e:
                        info = 'An error occurred when moving the errored files to %s.' % (
                            errorPath, )
                        logging.debug(info)
                        sendFailureEmail(info)
                else:
                    #No errors, move the files to the appropriate place
                    print "No Errors finalPath", finalPath
                    try:
                        logging.debug(
                            'PostProcess: moving archive mananger folder to final destination'
                        )
                        if os.path.exists(
                                os.path.join(
                                    finalPath,
                                    os.path.basename(destinationAMFolder))):
                            logging.debug(
                                'PostProcess: collision moving to duplicate box'
                            )
                            altPath = pathAfterSafelyMovingFileToDestinationFolder(
                                destinationAMFolder, finalPath)
                        else:
                            shutil.move(destinationAMFolder, finalPath)

                        if datastore.updateArchiveManagerJobAsReadyToComplete(
                                amRecord) == True:
                            logging.debug(
                                'PostProcess: job is ready to complete')
                            logging.debug(
                                'PostProcess: moving files and sending email')
                            info = 'Job %s is complete! All of the files are decrypted and have appropriate matching checksums.' % (
                                batchName)
                            sendSuccessEmail(info)
                        else:
                            logging.debug('PostProcess: Error saving Job')

                    except OSError as e:
                        #again, I am accounting for this error, I just don't know why I would ever encounter a situation like this
                        info = 'There was a problem moving the folder %s to the outbox. You will have to move the file manually.' % (
                            destinationAMFolder)
                        info = info + " " + e.message
                        sendFailureEmail(info)
                        logging.debug(info)
                        continue

            except Exception as e:
                info = 'An error occurred. Please see check the Decryption Queue for job %s. See Error: %s' % (
                    batchName, e.message)
                logging.debug(info)
                sendFailureEmail(info)

        else:
            #LAST CASE FOR SINGLE MODE FILES LIKE ENCRYPTION AND SINGLE MODE DECRYPTION
            newPath = pathAfterSafelyMovingFileToDestinationFolder(
                filePath, finalPath)

            if not os.path.exists(newPath):
                logging.debug('PostProcess: Error moving file')
                continue

            logging.debug(
                'PostProcess: Will update record status with Hash string and times'
            )

            success = datastore.updateRecordWithFinalEncryptedPathAndHashForStartTimeAndEndTime(
                newPath, hashString, startTime, endTime, key_id)

            if success == True:
                # move original file to original box
                try:
                    newPath = pathAfterSafelyMovingFileToDestinationFolder(
                        sourceFilePath, finalOriginalDestinationPath)
                except Exception as e:
                    logging.debug(
                        'There was an error moving the file into place')
                    info = 'There was an error moving file %s into the outbox at %s' % (
                        sourceFilePath, finalOriginalDestinationPath)
                    sendFailureEmail(info)

                if configurationOptions().shouldDeleteOriginal == True:
                    try:
                        os.remove(newPath)
                    except OSError as e:
                        logging.debug('PostProcess: Unable to delete the file',
                                      newPath)
Esempio n. 16
0
def postprocess(dbPath):
    '''
	This is the post process module
	'''

    if not os.path.exists(dbPath):
        logging.debug('PreProcess: can\'t find database at path')
        return

    datastore = DataStore(dbPath)

    loopcount = 0

    while True:
        sleep(5)

        if loopcount % 10 == 0:
            logging.debug('PostProcess is alive')
        loopcount += 1

        #calculate checksums on decrypted files
        data = datastore.recordsForReHashing()

        processRecordsReadyToBeHashed(data, datastore)

        #delete associated files as the job was successful
        amRecords = datastore.archiveManagerJobsReadyToComplete()
        for amRecord in amRecords:
            dataStoreRecords = datastore.recordsForUUID(amRecord.uuid)
            for record in dataStoreRecords:
                recordPath = record.fileName
                if configurationOptions().shouldDeleteOriginal == True:
                    try:
                        os.remove(recordPath)
                    except OSError as e:
                        info = 'PostProcess: Unable to delete the file %s' % (
                            recordPath, )
                        logging.debug(info)
            datastore.updateArchiveManagerJobAsComplete(amRecord)

        #move the associated files to the error box as the job had problems
        amRecords = datastore.archiveManagerJobsThatErrored()
        for amRecord in amRecords:
            logging.debug('performing clean up with ' + amRecord.amNumber)

            batchName = amRecord.amNumber
            destinationAMFolder = ''
            errorPath = ''

            dataStoreRecords = datastore.recordsForUUID(amRecord.uuid)
            for record in dataStoreRecords:
                pathStructureName = record.pathStructureName
                filePath = record.fileName
                currentPathStructure = configurationOptions(
                ).pathStructureWithName(pathStructureName)
                errorPath = currentPathStructure['errorBox']
                print filePath

                destinationAMFolder = os.path.join(os.path.dirname(filePath),
                                                   batchName)
                print 'This is where the working files will go.', destinationAMFolder

                if not os.path.exists(destinationAMFolder):
                    try:
                        os.mkdir(destinationAMFolder)
                    except OSError as e:
                        pass

                originalFileName = os.path.basename(filePath).split(
                    (batchName + "_"))[1]
                proposedAMPath = os.path.join(destinationAMFolder,
                                              originalFileName)

                try:
                    # newPath = pathAfterSafelyMovingFileToDestinationFile(filePath, proposedAMPath)
                    print filePath, proposedAMPath
                    shutil.move(filePath, proposedAMPath)
                except Exception as e:
                    info = 'There was an error moving a file at %s for Archive Manager job %s. This will need to be manually addressed.' % (
                        filePath, batchName)
                    sendFailureEmail(info)
                    continue

                currentFiles = os.listdir(destinationAMFolder)
                filesInJob = amRecord.allFilesInRecord()

                areAllFilesInPlace = True
                for nFile in filesInJob:
                    if nFile not in currentFiles:
                        areAllFilesInPlace = False
                if areAllFilesInPlace == True:
                    print "moving files to the error path"
                    try:
                        pathAfterSafelyMovingFolderToDestinationFolder(
                            destinationAMFolder, errorPath)
                    except Exception as e:
                        info = 'PostProcess: Unable to move the file %s' % (
                            filePath, )
                        logging.debug(info)
                        info = 'There was an error moving the folder %s into the outbox at %s' % (
                            destinationAMFolder, errorPath)
                        info = info + '\n' + 'This will need to be addressed manually'
                        sendFailureEmail(info)
                        continue

            datastore.updateArchiveManagerJobAsComplete(amRecord)