Example #1
0
def postprocess(dbPath):
	'''
	This is the post process module
	'''
	
	if not os.path.exists(dbPath):
		logging.debug('PreProcess: can\'t find database at path')
		return

	datastore = DataStore(dbPath)

	loopcount = 0	

	while True:
		sleep(5)

		if loopcount % 10 == 0:
			logging.debug('PostProcess is alive')
		loopcount += 1

		#calculate checksums on decrypted files
		data = datastore.recordsForReHashing()

		processRecordsReadyToBeHashed(data, datastore)
		
		#delete associated files as the job was successful
		amRecords = datastore.archiveManagerJobsReadyToComplete()
		for amRecord in amRecords:
			dataStoreRecords = datastore.recordsForUUID(amRecord.uuid)
			for record in dataStoreRecords:
				recordPath = record.fileName
				if configurationOptions().shouldDeleteOriginal == True:
					try:
						os.remove(recordPath)
					except OSError as e:
						info = 'PostProcess: Unable to delete the file %s' % (recordPath,)
						logging.debug(info)
			datastore.updateArchiveManagerJobAsComplete(amRecord)


		#move the associated files to the error box as the job had problems
		amRecords = datastore.archiveManagerJobsThatErrored()
		for amRecord in amRecords:
			logging.debug('performing clean up with ' + amRecord.amNumber)

			batchName 			= amRecord.amNumber
			destinationAMFolder = ''
			errorPath 			= ''

			dataStoreRecords = datastore.recordsForUUID(amRecord.uuid)
			for record in dataStoreRecords:
				pathStructureName 		= record.pathStructureName
				filePath 				= record.fileName
				currentPathStructure 	= configurationOptions().pathStructureWithName(pathStructureName)
				errorPath 				= currentPathStructure['errorBox']
				print filePath

				destinationAMFolder = os.path.join(os.path.dirname(filePath), batchName)
				print 'This is where the working files will go.', destinationAMFolder

				if not os.path.exists(destinationAMFolder):
					try:
						os.mkdir(destinationAMFolder)
					except OSError as e:
						pass

				originalFileName = os.path.basename(filePath).split((batchName + "_"))[1]
				proposedAMPath = os.path.join(destinationAMFolder, originalFileName)

				try:
					# newPath = pathAfterSafelyMovingFileToDestinationFile(filePath, proposedAMPath)
					print filePath, proposedAMPath
					shutil.move(filePath, proposedAMPath)
				except Exception as e:
					info = 'There was an error moving a file at %s for Archive Manager job %s. This will need to be manually addressed.' % (filePath, batchName)
					sendFailureEmail(info)
					continue

				currentFiles = os.listdir(destinationAMFolder)
				filesInJob = amRecord.allFilesInRecord()

				areAllFilesInPlace = True			
				for nFile in filesInJob:
					if nFile not in currentFiles:
						areAllFilesInPlace = False
				if areAllFilesInPlace == True:
					print "moving files to the error path"
					try:
						pathAfterSafelyMovingFolderToDestinationFolder(destinationAMFolder,errorPath)
					except Exception as e:
						info = 'PostProcess: Unable to move the file %s' % (filePath,)
						logging.debug(info)
						info = 'There was an error moving the folder %s into the outbox at %s' % (destinationAMFolder, errorPath)
						info = info + '\n' + 'This will need to be addressed manually'
						sendFailureEmail(info)
						continue

			datastore.updateArchiveManagerJobAsComplete(amRecord)
Example #2
0
def checkArchiveManagerJobs(dbPath):
    logging = DefaultLogger()

    datastore = DataStore(dbPath)
    amRecords = datastore.archiveManagerJobsReadyToStart()

    for amRecord in amRecords:
        areAllFilesAvailableAndReady = True

        recordsInAMRecord = datastore.recordsForUUID(amRecord.uuid)
        filesInAMRecord = [x.fileName for x in recordsInAMRecord]
        filesInCurrentFolder = []

        try:
            filesInCurrentFolder = os.listdir(amRecord.amPath)
        except Exception as e:
            pass

        isThereAnUnknownFilePresent = False
        for currentFile in filesInCurrentFolder:
            if currentFile not in filesInAMRecord:
                isThereAnUnknownFilePresent = True

        if isThereAnUnknownFilePresent == True:
            logging.debug('Unknown files are present')
            pass
            #report error

        for currentFile in filesInAMRecord:
            logging.debug('%s' % (currentFile))
            lastComponent = os.path.basename(currentFile)
            if lastComponent not in filesInCurrentFolder:
                logging.debug('The following file is not yet available: %s' %
                              (lastComponent))
                areAllFilesAvailableAndReady = False

        if areAllFilesAvailableAndReady == False:
            logging.debug('Not all of the files are staged yet')
            continue

        canLockAllRecords = True

        data = datastore.recordsForUUID(amRecord.uuid)

        for record in data:

            filePath = record.fileName

            try:
                fileToCheck = open(filePath, 'rb')
                portalocker.lock(fileToCheck, portalocker.LOCK_EX)
                fileToCheck.close()
                logging.debug(
                    'Acquire File: proceeding to update the file status knowing that no one else is using it...'
                )
            except Exception as e:
                logging.debug(
                    'Acquire File: unable to lock file as it is likely in use')
                canLockAllRecords = False

        if canLockAllRecords == False:
            logging.debug('Can not lock all of the records yet')
            continue

        for record in data:

            key_id = record.id
            filePath = record.fileName
            recordSize = int(record.fileSize)
            dateModifiedString = record.dateModified
            pathStructureName = record.pathStructureName
            operationType = record.operationType
            isBatch = record.isBatch
            batchName = record.batchName
            pathStructureName = record.pathStructureName

            newPath = filePath
            workingPath = configurationOptions().pathStructureWithName(
                pathStructureName)['workingBox']

            proposedBatchName = batchName + "_" + os.path.basename(filePath)
            proposedPath = os.path.join(os.path.dirname(filePath),
                                        proposedBatchName)

            #we prepend the job name to the file here as it belongs to a batch
            try:
                if os.path.exists(proposedPath):
                    raise Exception('file already exists')
                os.rename(filePath, proposedPath)
                filePath = proposedPath
            except Exception as e:
                #this is an unlikely occurrence
                info = 'There is a duplicate file in the queue for: ' + os.path.basename(
                    filePath) + " " + e.message
                logging.debug(info)
                sendFailureEmail(info)
                continue

            #at this point, I need to subtract the file's main folder from the pathStructure['inBox']
            #this moves the file from the inbox to the working path
            try:
                newPath = pathAfterSafelyMovingFileToDestinationFolder(
                    filePath, workingPath)
            except Exception as e:
                logging.debug(
                    'This shouldn\'t happen as pathAfterSafelyMovingFileToDestinationFolder should create a unique name that avoids any collisions, otherwise the file has been moved'
                )
                logging.debug('Acquire File: Error moving file')
                info = 'There was a problem moving the file into into the queue for: ' + os.path.basename(
                    filePath)
                info = info + '\n' + 'This will require manual intervention as the occurrence is unique.'
                sendFailureEmail(info)
                continue

            logging.debug(
                'Acquire File: updating record file status and path....')
            datastore.updateRecordAsStaticWithNewPath(newPath, key_id)
Example #3
0
def checkArchiveManagerJobs(dbPath):
	logging = DefaultLogger()

	datastore = DataStore(dbPath)
	amRecords = datastore.archiveManagerJobsReadyToStart()

	for amRecord in amRecords:
		areAllFilesAvailableAndReady = True
		
		recordsInAMRecord = datastore.recordsForUUID(amRecord.uuid)
		filesInAMRecord = [x.fileName for x in recordsInAMRecord]
		filesInCurrentFolder = []

		try:
			filesInCurrentFolder = os.listdir(amRecord.amPath)
		except Exception as e:
			pass

		isThereAnUnknownFilePresent = False
		for currentFile in filesInCurrentFolder:
			if currentFile not in filesInAMRecord:
				isThereAnUnknownFilePresent = True

		if isThereAnUnknownFilePresent == True:
			logging.debug('Unknown files are present')
			pass
			#report error

		for currentFile in filesInAMRecord:
			logging.debug('%s' % (currentFile))
			lastComponent = os.path.basename(currentFile)
			if lastComponent not in filesInCurrentFolder:
				logging.debug('The following file is not yet available: %s' % (lastComponent))
				areAllFilesAvailableAndReady = False

		if areAllFilesAvailableAndReady == False:
			logging.debug('Not all of the files are staged yet')
			continue

		canLockAllRecords = True

		data = datastore.recordsForUUID(amRecord.uuid)

		for record in data:
			
			filePath = record.fileName

			try:
				fileToCheck = open(filePath, 'rb')
				portalocker.lock(fileToCheck, portalocker.LOCK_EX)
				fileToCheck.close()
				logging.debug('Acquire File: proceeding to update the file status knowing that no one else is using it...')
			except Exception as e:
				logging.debug('Acquire File: unable to lock file as it is likely in use')
				canLockAllRecords = False

		if canLockAllRecords == False:
			logging.debug('Can not lock all of the records yet')
			continue

		for record in data:

			key_id 				= record.id
			filePath 			= record.fileName
			recordSize 			= int(record.fileSize)
			dateModifiedString 	= record.dateModified
			pathStructureName 	= record.pathStructureName
			operationType		= record.operationType
			isBatch				= record.isBatch
			batchName			= record.batchName
			pathStructureName 	= record.pathStructureName

			newPath = filePath
			workingPath = configurationOptions().pathStructureWithName(pathStructureName)['workingBox']

			proposedBatchName = batchName + "_" + os.path.basename(filePath)
			proposedPath = os.path.join(os.path.dirname(filePath), proposedBatchName) 

			#we prepend the job name to the file here as it belongs to a batch
			try:
				if os.path.exists(proposedPath):
					raise Exception('file already exists')
				os.rename(filePath, proposedPath)
				filePath = proposedPath
			except Exception as e:
				#this is an unlikely occurrence
				info = 'There is a duplicate file in the queue for: ' + os.path.basename(filePath) + " " + e.message
				logging.debug(info)
				sendFailureEmail(info)
				continue

			#at this point, I need to subtract the file's main folder from the pathStructure['inBox']
			#this moves the file from the inbox to the working path
			try:
				newPath = pathAfterSafelyMovingFileToDestinationFolder(filePath, workingPath)
			except Exception as e:
				logging.debug('This shouldn\'t happen as pathAfterSafelyMovingFileToDestinationFolder should create a unique name that avoids any collisions, otherwise the file has been moved')
				logging.debug('Acquire File: Error moving file')
				info = 'There was a problem moving the file into into the queue for: ' + os.path.basename(filePath)
				info = info + '\n' + 'This will require manual intervention as the occurrence is unique.'
				sendFailureEmail(info)
				continue

			logging.debug('Acquire File: updating record file status and path....')
			datastore.updateRecordAsStaticWithNewPath(newPath, key_id)
Example #4
0
def postprocess(dbPath):
    '''
	This is the post process module
	'''

    if not os.path.exists(dbPath):
        logging.debug('PreProcess: can\'t find database at path')
        return

    datastore = DataStore(dbPath)

    loopcount = 0

    while True:
        sleep(5)

        if loopcount % 10 == 0:
            logging.debug('PostProcess is alive')
        loopcount += 1

        #calculate checksums on decrypted files
        data = datastore.recordsForReHashing()

        processRecordsReadyToBeHashed(data, datastore)

        #delete associated files as the job was successful
        amRecords = datastore.archiveManagerJobsReadyToComplete()
        for amRecord in amRecords:
            dataStoreRecords = datastore.recordsForUUID(amRecord.uuid)
            for record in dataStoreRecords:
                recordPath = record.fileName
                if configurationOptions().shouldDeleteOriginal == True:
                    try:
                        os.remove(recordPath)
                    except OSError as e:
                        info = 'PostProcess: Unable to delete the file %s' % (
                            recordPath, )
                        logging.debug(info)
            datastore.updateArchiveManagerJobAsComplete(amRecord)

        #move the associated files to the error box as the job had problems
        amRecords = datastore.archiveManagerJobsThatErrored()
        for amRecord in amRecords:
            logging.debug('performing clean up with ' + amRecord.amNumber)

            batchName = amRecord.amNumber
            destinationAMFolder = ''
            errorPath = ''

            dataStoreRecords = datastore.recordsForUUID(amRecord.uuid)
            for record in dataStoreRecords:
                pathStructureName = record.pathStructureName
                filePath = record.fileName
                currentPathStructure = configurationOptions(
                ).pathStructureWithName(pathStructureName)
                errorPath = currentPathStructure['errorBox']
                print filePath

                destinationAMFolder = os.path.join(os.path.dirname(filePath),
                                                   batchName)
                print 'This is where the working files will go.', destinationAMFolder

                if not os.path.exists(destinationAMFolder):
                    try:
                        os.mkdir(destinationAMFolder)
                    except OSError as e:
                        pass

                originalFileName = os.path.basename(filePath).split(
                    (batchName + "_"))[1]
                proposedAMPath = os.path.join(destinationAMFolder,
                                              originalFileName)

                try:
                    # newPath = pathAfterSafelyMovingFileToDestinationFile(filePath, proposedAMPath)
                    print filePath, proposedAMPath
                    shutil.move(filePath, proposedAMPath)
                except Exception as e:
                    info = 'There was an error moving a file at %s for Archive Manager job %s. This will need to be manually addressed.' % (
                        filePath, batchName)
                    sendFailureEmail(info)
                    continue

                currentFiles = os.listdir(destinationAMFolder)
                filesInJob = amRecord.allFilesInRecord()

                areAllFilesInPlace = True
                for nFile in filesInJob:
                    if nFile not in currentFiles:
                        areAllFilesInPlace = False
                if areAllFilesInPlace == True:
                    print "moving files to the error path"
                    try:
                        pathAfterSafelyMovingFolderToDestinationFolder(
                            destinationAMFolder, errorPath)
                    except Exception as e:
                        info = 'PostProcess: Unable to move the file %s' % (
                            filePath, )
                        logging.debug(info)
                        info = 'There was an error moving the folder %s into the outbox at %s' % (
                            destinationAMFolder, errorPath)
                        info = info + '\n' + 'This will need to be addressed manually'
                        sendFailureEmail(info)
                        continue

            datastore.updateArchiveManagerJobAsComplete(amRecord)