Example #1
0
def preprocess(dbPath):
    '''
	This is a preprocess module
	'''
    logging = DefaultLogger()

    if not os.path.exists(dbPath):
        logging.debug('PreProcess: can\'t find database at path')
        return

    datastore = DataStore(dbPath)
    loopcount = 0

    while True:
        sleep(5)

        if loopcount % 10 == 0:
            logging.debug('PreProcess is alive')
        loopcount += 1

        data = datastore.recordsForHashing()
        for record in data:
            logging.debug(record)

            key_id = record.id
            filePath = record.fileName

            if not os.path.exists(filePath):
                logging.debug(
                    'PreProcess: Will update record status as the file no longer exists'
                )
                datastore.updateRecordAsMissingWithID(key_id)
                continue

            try:
                logging.debug('PreProcess: locking file to calculate hash...')
                ##UPDATE HASH OPERATION START HERE
                startTime = datetime.datetime.now().strftime(
                    "%Y-%m-%d %H:%M:%S")
                datastore.updateRecordWithHashStart(startTime, key_id)

                fileToHash = open(filePath, 'rb')
                portalocker.lock(fileToHash, portalocker.LOCK_SH)
                hashString = "NO_OP"  #hashForFile(fileToHash)
                endTime = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
                fileToHash.close()

                logging.debug('PreProcess: unlocking file...')
                logging.debug(
                    'PreProcess: Will update record status with Hash string and times'
                )

                datastore.updateRecordWithHashForStartTimeAndEndTime(
                    hashString, startTime, endTime, key_id)

            except Exception as e:
                info = 'PreProcess: There was an error when calculating the hash for file: ' + os.path.basename(
                    filePath) + ' ' + e.message
                sendFailureEmail(info)
                logging.error(e.message)
Example #2
0
def preprocess(dbPath):
	'''
	This is a preprocess module
	'''
	logging = DefaultLogger()

	if not os.path.exists(dbPath):
		logging.debug('PreProcess: can\'t find database at path')
		return

	datastore = DataStore(dbPath)
	loopcount = 0

	while True:
		sleep(5)

		if loopcount % 10 == 0:
			logging.debug('PreProcess is alive')
		loopcount += 1

		data = datastore.recordsForHashing()
		for record in data:
			logging.debug(record)

			key_id = record.id
			filePath = record.fileName

			if not os.path.exists(filePath):
				logging.debug('PreProcess: Will update record status as the file no longer exists')
				datastore.updateRecordAsMissingWithID(key_id)
				continue

			try:
				logging.debug('PreProcess: locking file to calculate hash...')
				##UPDATE HASH OPERATION START HERE
				startTime = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
				datastore.updateRecordWithHashStart(startTime, key_id)

				fileToHash = open(filePath, 'rb')
				portalocker.lock(fileToHash, portalocker.LOCK_SH)
				hashString = "NO_OP"#hashForFile(fileToHash) 
				endTime = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
				fileToHash.close()

				logging.debug('PreProcess: unlocking file...')
				logging.debug('PreProcess: Will update record status with Hash string and times')

				datastore.updateRecordWithHashForStartTimeAndEndTime(hashString, startTime, endTime, key_id)

			except Exception as e:
				info = 'PreProcess: There was an error when calculating the hash for file: ' + os.path.basename(filePath) + ' ' + e.message
				sendFailureEmail(info)
				logging.error(e.message)
Example #3
0
def checkSingleFiles(dbPath):
	logging = DefaultLogger()

	if not os.path.exists(dbPath):
		logging.debug('Acquire File: can\'t find database at path')
		return
	
	datastore = DataStore(dbPath)
	data = datastore.recordsForVerifying()

	for record in data:

		key_id 				= record.id
		filePath 			= record.fileName
		recordSize 			= int(record.fileSize)
		dateModifiedString 	= record.dateModified
		pathStructureName 	= record.pathStructureName
		operationType		= record.operationType
		isBatch				= record.isBatch
		batchName			= record.batchName

		dateLastModified = datetime.datetime.strptime(dateModifiedString, '%Y-%m-%d %H:%M:%S')
		timeDifference = datetime.datetime.now() - dateLastModified

		#This can change with an if/else should I decide I want to put temp files to be decrypted in another place
		sourcePath = configurationOptions().pathStructureWithName(pathStructureName)['inBox']
		workingPath = configurationOptions().pathStructureWithName(pathStructureName)['workingBox']

		if timeDifference.seconds < verificationWaitTime:
			continue

		lastSize = recordSize
		currentSize = 0

		if not os.path.exists(filePath):
			logging.debug('Acquire File: Will update record status as the file no longer exists')
			datastore.updateRecordAsMissingWithID(key_id)
			continue

		currentSize = os.path.getsize(filePath)

		if lastSize != currentSize:
			logging.debug(record)
			logging.debug('Acquire File: attempting db modify as file size has changed...')
			datastore.updateRecordWithCurrentSizeAndDateModifiedWithID(currentSize, datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), key_id)
			continue

		if currentSize == 0:
			continue
			# if the current size is zero, then continue until it isn't or never will be
			# its likely the file has been queued to copy but no data has been moved yet (actual OSX case) 
			
		logging.debug('Acquire File: attempting to lock the file to see if I own the file yet...')

		try:
			fileToCheck = open(filePath, 'rb')
			portalocker.lock(fileToCheck, portalocker.LOCK_EX)
			fileToCheck.close()
			logging.debug('Acquire File: proceeding to update the file status knowing that no one else is using it...')
		except Exception as e:
			logging.debug('Acquire File: unable to lock file as it is likely in use')
			continue

		if datastore.doesTheFilePathExistElseWhereInThePathStructure(filePath, operationType, pathStructureName) == True:
			duplicatePath = configurationOptions().pathStructureWithName(pathStructureName)['duplicateBox']
			newPath = pathAfterSafelyMovingFileToDestinationFolder(filePath, duplicatePath)
			datastore.updateRecordAsDuplicateWithNewPath(newPath, key_id)
			continue

		newPath = filePath
		#Only update 
		if isBatch == 1 and operationType == 'Decrypt':
			amRecord = None
			uuidString = fileNameForUUIDFileWithPath(os.path.dirname(filePath))

			if uuidString == None:
				#if I can't resolve the UUID, then resovle it though an AM Record
				#Does file's Archive Manager have data associated with it
				amRecord = datastore.recordWithNumberFromAMJobsTable(batchName)
				if amRecord == None:
					info = "Acquire File: Archive Manager data doesn't exist for " + filePath
					info = info + " " + "Marking file as having no AM Data. File will not be moved through the processing queue."
					logging.debug(info)
					datastore.updateRecordStatusWithID(datastore.noArchiveManagerDataExistsForRecord(), key_id)
					continue
			else:
				logging.debug('Updating record %s with UUID %s' % (filePath, uuidString))
				amRecord = datastore.archiveManagerJobsTableRecordWithUUID(uuidString)
				datastore.updateRecordAWithBatchUUIDReference(uuidString, key_id)
		else:
			#at this point, I need to subtract the file's main folder from the pathStructure['inBox']
			#this moves the file from the inbox to the working path
			try:
				newPath = pathAfterSafelyMovingFileToDestinationFolder(filePath, workingPath)
			except Exception as e:
				logging.debug('This shouldn\'t happen as pathAfterSafelyMovingFileToDestinationFolder should create a unique name that avoids any collisions, otherwise the file has been moved')
				logging.debug('Acquire File: Error moving file')
				info = 'There was a problem moving the file into into the queue for: ' + os.path.basename(filePath)
				info = info + '\n' + 'This will require manual intervention as the occurrence is unique.'
				sendFailureEmail(info)
				continue

			logging.debug('Acquire File: updating record file status and path....')
			datastore.updateRecordAsStaticWithNewPath(newPath, key_id)
Example #4
0
def checkSingleFiles(dbPath):
    logging = DefaultLogger()

    if not os.path.exists(dbPath):
        logging.debug('Acquire File: can\'t find database at path')
        return

    datastore = DataStore(dbPath)
    data = datastore.recordsForVerifying()

    for record in data:

        key_id = record.id
        filePath = record.fileName
        recordSize = int(record.fileSize)
        dateModifiedString = record.dateModified
        pathStructureName = record.pathStructureName
        operationType = record.operationType
        isBatch = record.isBatch
        batchName = record.batchName

        dateLastModified = datetime.datetime.strptime(dateModifiedString,
                                                      '%Y-%m-%d %H:%M:%S')
        timeDifference = datetime.datetime.now() - dateLastModified

        #This can change with an if/else should I decide I want to put temp files to be decrypted in another place
        sourcePath = configurationOptions().pathStructureWithName(
            pathStructureName)['inBox']
        workingPath = configurationOptions().pathStructureWithName(
            pathStructureName)['workingBox']

        if timeDifference.seconds < verificationWaitTime:
            continue

        lastSize = recordSize
        currentSize = 0

        if not os.path.exists(filePath):
            logging.debug(
                'Acquire File: Will update record status as the file no longer exists'
            )
            datastore.updateRecordAsMissingWithID(key_id)
            continue

        currentSize = os.path.getsize(filePath)

        if lastSize != currentSize:
            logging.debug(record)
            logging.debug(
                'Acquire File: attempting db modify as file size has changed...'
            )
            datastore.updateRecordWithCurrentSizeAndDateModifiedWithID(
                currentSize,
                datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), key_id)
            continue

        if currentSize == 0:
            continue
            # if the current size is zero, then continue until it isn't or never will be
            # its likely the file has been queued to copy but no data has been moved yet (actual OSX case)

        logging.debug(
            'Acquire File: attempting to lock the file to see if I own the file yet...'
        )

        try:
            fileToCheck = open(filePath, 'rb')
            portalocker.lock(fileToCheck, portalocker.LOCK_EX)
            fileToCheck.close()
            logging.debug(
                'Acquire File: proceeding to update the file status knowing that no one else is using it...'
            )
        except Exception as e:
            logging.debug(
                'Acquire File: unable to lock file as it is likely in use')
            continue

        if datastore.doesTheFilePathExistElseWhereInThePathStructure(
                filePath, operationType, pathStructureName) == True:
            duplicatePath = configurationOptions().pathStructureWithName(
                pathStructureName)['duplicateBox']
            newPath = pathAfterSafelyMovingFileToDestinationFolder(
                filePath, duplicatePath)
            datastore.updateRecordAsDuplicateWithNewPath(newPath, key_id)
            continue

        newPath = filePath
        #Only update
        if isBatch == 1 and operationType == 'Decrypt':
            amRecord = None
            uuidString = fileNameForUUIDFileWithPath(os.path.dirname(filePath))

            if uuidString == None:
                #if I can't resolve the UUID, then resovle it though an AM Record
                #Does file's Archive Manager have data associated with it
                amRecord = datastore.recordWithNumberFromAMJobsTable(batchName)
                if amRecord == None:
                    info = "Acquire File: Archive Manager data doesn't exist for " + filePath
                    info = info + " " + "Marking file as having no AM Data. File will not be moved through the processing queue."
                    logging.debug(info)
                    datastore.updateRecordStatusWithID(
                        datastore.noArchiveManagerDataExistsForRecord(),
                        key_id)
                    continue
            else:
                logging.debug('Updating record %s with UUID %s' %
                              (filePath, uuidString))
                amRecord = datastore.archiveManagerJobsTableRecordWithUUID(
                    uuidString)
                datastore.updateRecordAWithBatchUUIDReference(
                    uuidString, key_id)
        else:
            #at this point, I need to subtract the file's main folder from the pathStructure['inBox']
            #this moves the file from the inbox to the working path
            try:
                newPath = pathAfterSafelyMovingFileToDestinationFolder(
                    filePath, workingPath)
            except Exception as e:
                logging.debug(
                    'This shouldn\'t happen as pathAfterSafelyMovingFileToDestinationFolder should create a unique name that avoids any collisions, otherwise the file has been moved'
                )
                logging.debug('Acquire File: Error moving file')
                info = 'There was a problem moving the file into into the queue for: ' + os.path.basename(
                    filePath)
                info = info + '\n' + 'This will require manual intervention as the occurrence is unique.'
                sendFailureEmail(info)
                continue

            logging.debug(
                'Acquire File: updating record file status and path....')
            datastore.updateRecordAsStaticWithNewPath(newPath, key_id)
Example #5
0
def decrypt(dbPath):
	'''
	This process examines the database pointed to by dbPath. It 
	Looks for any records which have status 2 and has had a hash value calculated for it.
	'''
	logging = DefaultLogger()

	if not os.path.exists(dbPath):
		logging.debug('Decryptor: can\'t find database at path')
		return

	datastore = DataStore(dbPath)
	loopcount = 0

	while True:
		sleep(5)

		if loopcount % 10 == 0:
			logging.debug('Decryptor Process is alive')
		loopcount += 1

		data = datastore.recordsReadyToDecrypt()
		for record in data:
			logging.debug(record)

			key_id 			   = record.id
			filePath 		   = record.fileName
			pathStructureName  = record.pathStructureName	
			isBatch			   = record.isBatch
			batchName		   = record.batchName

			if not os.path.exists(filePath):
				logging.debug('Decryptor: will update record status as the file no longer exists')
				datastore.updateRecordAsMissingWithID(key_id)
			else:
				options = configurationOptions()
				currentPathStructure = options.pathStructureWithName(pathStructureName)
				decryptionErrorPath = currentPathStructure['errorBox']
				decryptionInterimPath = currentPathStructure['interimBox']
				options.inputPath = filePath

				decryptedFilePath = os.path.join(decryptionInterimPath, os.path.basename(filePath))
				operationStart = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
				nextStatusValue = datastore.operationCompleteStatusCode()

				message = 'Decryptor: decrypting file ' + filePath
				logging.debug(message)

				##UPDATE OPERATION START
				datastore.updateRecordStatusWithOperationStart(operationStart, key_id)

				args = [options.decryptorApplicationPath, filePath, decryptedFilePath]
				process = subprocess.Popen(args)
				out, err = process.communicate()
				returnCode = process.returncode

				message = 'Decryptor: decrypted file with return code ' +  str(returnCode)
				logging.debug(message)

				operationStop = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")

				if returnCode != 0:
					info = 'An error occurred with the Decryption operation when decrypting %s.' % (filePath)
					logging.debug(info)
					
					operationStart = datetime.datetime(2000,1,1)
					operationStop = datetime.datetime(2000,1,1)

					if isBatch == 0:
						
						nextStatusValue = datastore.operationFailedStatusCode()

						if os.path.abspath(os.path.dirname(filePath)) != os.path.abspath(decryptionErrorPath):
							logging.debug('moving file to error path')
							if os.path.exists(decryptionErrorPath):
								# shutil.move(filePath, decryptionErrorPath)
								# newPath = os.path.join(decryptionErrorPath, os.path.basename(filePath))
								newPath = pathAfterSafelyMovingFileToDestinationFolder(filePath, decryptionErrorPath)
								if not os.path.exists(newPath):
									logging.debug('Decryptor: Error moving file')
									nextStatusValue = datastore.errorMovingFileStatusCode()
							else:
								logging.debug('Decryptor: decryptionErrorPath doesnt exist')
								nextStatusValue = datastore.errorPathDoesntExistStatusCode()
					else:
						#don't move batch files, just update the batch's am errorString to reflect the problem
						#the file's checksum will fail
						#we don't update the batch file's status
						amRecord = datastore.recordWithNumberFromAMJobsTable(batchName)
						if amRecord == None:
							#This should not happen as we don't even allow for the logic to proceed to this point without
							#a valid Archive Manager Record
							info = 'An error occurred where no data was found for the Archive Manager job ' + batchName + '\n'
							info = info + 'This error should not happen. Please check ' + os.path.dirname(filePath) + '\n'
							info = info + 'The files will need to be manually removed from the Decryption Queue.'
							logging.debug(info)
							sendFailureEmail(info)
							continue
						
						errorString = 'A problem was encountered while decrypting %s.' % (filePath)
						errorString = errorString + 'The file\'s checksum will be calculated and compared against that in Daisy should the error have occurred ater the file was decrypted.'
						if amRecord.errorString != '':
							amRecord.errorString = amRecord.errorString + '\n' + errorString
						else:
							amRecord.errorString = errorString

						datastore.updateArchiveManagerJobErrorString(amRecord, amRecord.errorString)

				# we update the status value
				datastore.updateRecordStatusWithDecryptedFileNameAndStartAndEndTime(nextStatusValue, decryptedFilePath, operationStart, operationStop, key_id)
Example #6
0
def checkSingleFiles(dbPath):
	logging = DefaultLogger()

	if not os.path.exists(dbPath):
		logging.debug('Acquire File: can\'t find database at path')
		return
	
	datastore = DataStore(dbPath)
	data = datastore.recordsForVerifying()

	for record in data:

		key_id 				= record.id
		filePath 			= record.fileName
		recordSize 			= int(record.fileSize)
		dateModifiedString 	= record.dateModified

		dateLastModified = datetime.datetime.strptime(dateModifiedString, '%Y-%m-%d %H:%M:%S')
		timeDifference = datetime.datetime.now() - dateLastModified

		#This can change with an if/else should I decide I want to put temp files to be decrypted in another place
		sourcePath = configurationOptions().defaultPathStructure()['inBox']
		workingPath = configurationOptions().defaultPathStructure()['workingBox']

		if timeDifference.seconds < verificationWaitTime:
			continue

		lastSize = recordSize
		currentSize = 0

		if not os.path.exists(filePath):
			logging.debug('Acquire File: Will update record status as the file no longer exists')
			datastore.updateRecordAsMissingWithID(key_id)
			continue

		currentSize = os.path.getsize(filePath)

		if lastSize != currentSize:
			logging.debug(record)
			logging.debug('Acquire File: attempting db modify as file size has changed...')
			datastore.updateRecordWithCurrentSizeAndDateModifiedWithID(currentSize, datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), key_id)
			continue

		if currentSize == 0:
			continue
			# if the current size is zero, then continue until it isn't or never will be
			# its likely the file has been queued to copy but no data has been moved yet (actual OSX case) 
			
		logging.debug('Acquire File: attempting to lock the file to see if I own the file yet...')

		try:
			fileToCheck = open(filePath, 'rb')
			portalocker.lock(fileToCheck, portalocker.LOCK_EX)
			fileToCheck.close()
			logging.debug('Acquire File: proceeding to update the file status knowing that no one else is using it...')
		except Exception as e:
			logging.debug('Acquire File: unable to lock file as it is likely in use')
			continue

		#must test that file doesn't exist elsewhere in the path

		newPath = filePath
		try:
			newPath = pathAfterSafelyMovingFileToDestinationFolder(filePath, workingPath)
		except Exception as e:
			info = '''This shouldn\'t happen as pathAfterSafelyMovingFileToDestinationFolder should create a unique name that avoids any collisions, otherwise the file has been moved'''
			logging.debug(info)
			logging.debug('Acquire File: Error moving file')
			info = 'There was a problem moving the file into into the queue for: ' + os.path.basename(filePath)
			info = info + '\n' + 'This will require manual intervention as the occurrence is unique.'
			#SEND FAILURE EMAIL
			continue

		logging.debug('Acquire File: updating record file status and path....')
		datastore.updateRecordAsStaticWithNewPath(newPath, key_id)
Example #7
0
def encrypt(dbPath):
	'''
	This process examines the database pointed to by dbPath. It 
	Looks for any records which have status 2 and has had a hash value calculated for it.
	'''
	logging = DefaultLogger()

	if not os.path.exists(dbPath):
		logging.debug('can\'t find database at path')
		return

	datastore = DataStore(dbPath)
	loopcount = 0

	while True:
		sleep(5)

		if loopcount % 10 == 0:
			logging.debug('Encryptor Process is alive')
		loopcount += 1

		data = datastore.recordsReadyToEncrypt()
		for record in data:
			logging.debug(record)

			key_id = record.id
			filePath = record.fileName	
			pathStructureName = record.pathStructureName

			if not os.path.exists(filePath):
				logging.debug('Encryptor: will update record status as the file no longer exists')
				datastore.updateRecordAsMissingWithID(key_id)
			else:
				options = configurationOptions()
				currentPathStructure = options.pathStructureWithName(pathStructureName)
				encryptionErrorPath = currentPathStructure['errorBox']
				encryptionInterimPath = currentPathStructure['interimBox']

				encryptedFilePath = os.path.join(encryptionInterimPath, os.path.basename(filePath))
				encryptionStart = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
				nextStatusValue = datastore.operationCompleteStatusCode()

				options.inputPath = filePath
				options.destinationPath = os.path.dirname(encryptedFilePath)

				##UPDATE THAT ENCRYPTION STARTS HERE
				datastore.updateRecordStatusWithOperationStart(encryptionStart, key_id)

				message = 'Encryptor: encrypting file ' + filePath
				logging.debug(message)

				#there is a bug with MediaSeal when encrypting an encrypted file,
				#this checks for this so that MediaSeal doesn't blow away the file.
				returnCode = -7
				fileToEncrypt = None
				try:
					fileToEncrypt = open(filePath, 'rb')
					portalocker.lock(fileToEncrypt, portalocker.LOCK_SH)
					returnCode = singleShotEncryptor(options)
				except Exception as e:
					logging.debug('unable to lock file')

				if fileToEncrypt is not None:
					fileToEncrypt.close()

				message = 'Encryptor: encrypted file with return code ' +  str(returnCode)
				logging.debug(message)

				encryptionStop = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")

				#try again should the connection be bad
				if returnCode == 2:
					sleep(5)
					returnCode = singleShotEncryptor(options)
					encryptionStart = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")

				#as we are encrypting single files, we can leave this logic the same
				if returnCode != 0:
					info = "There was a problem encrypting " + filePath + ". Encountered Error Code: " + str(returnCode) + ". The file will be moved to the path's Error box: " + encryptionErrorPath 
					sendFailureEmail(info)

					nextStatusValue = datastore.operationFailedStatusCode()
					encryptionStart = datetime.datetime(2000,1,1)
					encryptionStop = datetime.datetime(2000,1,1)

					if os.path.abspath(os.path.dirname(filePath)) != os.path.abspath(encryptionErrorPath):
						logging.debug('moving file to error path')
						if os.path.exists(encryptionErrorPath):
							try:
								newPath = pathAfterSafelyMovingFileToDestinationFolder(filePath, encryptionErrorPath)
							except Exception as e:
								logging.debug('Encryptor: Error moving file')
								
							nextStatusValue = datastore.errorMovingFileStatusCode()
						else:
							logging.debug('Encryptor: encryptionErrorPath doesnt exist')
							nextStatusValue = datastore.errorPathDoesntExistStatusCode()

				datastore.updateRecordStatusWithEncryptedFileNameAndStartAndEndTime(nextStatusValue, encryptedFilePath, encryptionStart, encryptionStop, key_id)