def __init__(self, storageDirFd, testModeFlag, sourceUrl): """Create a temporary storage file and a handle to it.""" self.testModeFlag = testModeFlag self.sourceUrl = sourceUrl guerillabackup.assertSourceUrlSpecificationConforming(sourceUrl) self.elementIdParts = DefaultFileSystemSink.internalGetElementIdParts( sourceUrl, None) self.storageDirFd = None if self.elementIdParts[0] == '': self.storageDirFd = os.dup(storageDirFd) else: self.storageDirFd = guerillabackup.secureOpenAt( storageDirFd, self.elementIdParts[0][1:], symlinksAllowedFlag=False, dirOpenFlags=os.O_RDONLY | os.O_DIRECTORY | os.O_NOFOLLOW | os.O_NOCTTY, dirCreateMode=0o700, fileOpenFlags=os.O_DIRECTORY | os.O_RDONLY | os.O_NOFOLLOW | os.O_CREAT | os.O_EXCL | os.O_NOCTTY, fileCreateMode=0o700) # Generate a temporary file name in the same directory. while True: self.tmpFileName = 'tmp-%s-%d' % (self.elementIdParts[1], random.randint(0, 1 << 30)) try: self.streamFd = guerillabackup.secureOpenAt( self.storageDirFd, self.tmpFileName, symlinksAllowedFlag=False, dirOpenFlags=os.O_RDONLY | os.O_DIRECTORY | os.O_NOFOLLOW | os.O_NOCTTY, dirCreateMode=None, fileOpenFlags=os.O_RDWR | os.O_NOFOLLOW | os.O_CREAT | os.O_EXCL | os.O_NOCTTY, fileCreateMode=0o600) break except OSError as openError: if openError.errno != errno.EEXIST: os.close(self.storageDirFd) raise
def updateStateData(self, persistencyDirFd): """Replace the current state data file with one containing the current unit internal state. @throw Exception is writing fails for any reason. The unit will be in incorrectable state afterwards.""" # Create the data structures for writing. stateData = {} for sourceUrl, description in self.backupUnitDescriptions.items(): stateData[sourceUrl] = description.getJsonData() writeData = bytes(json.dumps(stateData), 'ascii') # Try to replace the current state file. At first unlink the old # one. try: os.unlink('state.old', dir_fd=persistencyDirFd) except OSError as unlinkError: if unlinkError.errno != errno.ENOENT: raise # Link the current to the old one. try: os.link( 'state.current', 'state.old', src_dir_fd=persistencyDirFd, dst_dir_fd=persistencyDirFd, follow_symlinks=False) except OSError as relinkError: if relinkError.errno != errno.ENOENT: raise # Unlink the current state. Thus we can then use O_EXCL on create. try: os.unlink('state.current', dir_fd=persistencyDirFd) except OSError as relinkError: if relinkError.errno != errno.ENOENT: raise # Create the new file. fileHandle = None try: fileHandle = guerillabackup.secureOpenAt( persistencyDirFd, 'state.current', fileOpenFlags=os.O_WRONLY|os.O_CREAT|os.O_EXCL|os.O_NOFOLLOW|os.O_NOCTTY, fileCreateMode=0o600) os.write(fileHandle, writeData) # Also close handle within try, except block to catch also delayed # errors after write. os.close(fileHandle) fileHandle = None except Exception as stateSaveException: # Writing of state information failed. Print out the state information # for manual reconstruction as last resort. print('Writing of state information failed: %s\nCurrent state: ' \ '%s' % (str(stateSaveException), repr(writeData)), file=sys.stderr) traceback.print_tb(sys.exc_info()[2]) raise finally: if fileHandle != None: os.close(fileHandle)
def openElementFile(self, name, fileOpenFlags=None): """Open the element file with given name. @param fileOpenFlags when None, open the file readonly without creating it. @return the file descriptor to the new file.""" if fileOpenFlags is None: fileOpenFlags = os.O_RDONLY|os.O_NOFOLLOW|os.O_NOCTTY valueFileName = '.'+self.elementId+'.'+name elementFd = guerillabackup.secureOpenAt( self.storageDirFd, valueFileName, symlinksAllowedFlag=False, dirOpenFlags=os.O_RDONLY|os.O_DIRECTORY|os.O_NOFOLLOW|os.O_NOCTTY, dirCreateMode=None, fileOpenFlags=fileOpenFlags) return elementFd
def delete(self): """Delete this data element. This will remove all files for this element. The resource should be locked by the process attempting removal if concurrent access is possible.""" lastFileSepPos = self.elementId.rfind('/') dirFd = guerillabackup.secureOpenAt( self.storageDirFd, '.'+self.elementId[:lastFileSepPos], symlinksAllowedFlag=False, dirOpenFlags=os.O_RDONLY|os.O_DIRECTORY|os.O_NOFOLLOW|os.O_NOCTTY, dirCreateMode=None, fileOpenFlags=os.O_RDONLY|os.O_DIRECTORY|os.O_NOFOLLOW|os.O_NOCTTY) try: fileNamePrefix = self.elementId[lastFileSepPos+1:] for fileName in guerillabackup.listDirAt(dirFd): if fileName.startswith(fileNamePrefix): os.unlink(fileName, dir_fd=dirFd) finally: os.close(dirFd)
def openStorageDir(self, storageDirName, configContext): """Open the storage behind the sink. This method may only be called once.""" if self.storageDirName != None: raise Exception('Already defined') self.storageDirName = storageDirName self.storageDirFd = guerillabackup.secureOpenAt( -1, self.storageDirName, symlinksAllowedFlag=False, dirOpenFlags=os.O_RDONLY | os.O_DIRECTORY | os.O_NOFOLLOW | os.O_NOCTTY, dirCreateMode=None, fileOpenFlags=os.O_DIRECTORY | os.O_RDONLY | os.O_NOFOLLOW | os.O_NOCTTY, fileCreateMode=0o700) self.testModeFlag = configContext.get( guerillabackup.CONFIG_GENERAL_DEBUG_TEST_MODE_KEY, False) if not isinstance(self.testModeFlag, bool): raise Exception('Configuration parameter %s has to be ' \ 'boolean' % guerillabackup.CONFIG_GENERAL_DEBUG_TEST_MODE_KEY)
def getBackupDataElementForMetaData(self, sourceUrl, metaData): """Retrieve a single stored backup data element from the storage. @param sourceUrl the URL identifying the source that produced the stored data elements. @param metaData metaData dictionary for the element of interest. @throws Exception when an incompatible query, update or read is in progress. @return the element or None if no matching element was found.""" # At first get an iterator over all elements in file system that # might match the given query. guerillabackup.assertSourceUrlSpecificationConforming(sourceUrl) elementIdParts = \ guerillabackup.DefaultFileSystemSink.internalGetElementIdParts( sourceUrl, metaData) # Now search the directory for all files conforming to the specifiction. # As there may exist multiple files with the same time stamp and # type, load also the meta data and check if matches the query. elementDirFd = None if len(elementIdParts[0]) == 0: elementDirFd = os.dup(self.storageDirFd) else: try: elementDirFd = guerillabackup.secureOpenAt( self.storageDirFd, elementIdParts[0][1:], symlinksAllowedFlag=False, dirOpenFlags=os.O_RDONLY|os.O_DIRECTORY|os.O_NOFOLLOW|os.O_NOCTTY, dirCreateMode=0o700, fileOpenFlags=os.O_DIRECTORY|os.O_RDONLY|os.O_NOFOLLOW|os.O_CREAT|os.O_EXCL|os.O_NOCTTY) except OSError as dirOpenError: # Directory does not exist, so there cannot be any valid element. if dirOpenError.errno == errno.ENOENT: return None raise searchPrefix = elementIdParts[2] searchSuffix = '-%s-%s.data' % (elementIdParts[1], elementIdParts[3]) result = None try: fileList = guerillabackup.listDirAt(elementDirFd) for fileName in fileList: if ((not fileName.startswith(searchPrefix)) or (not fileName.endswith(searchSuffix))): continue # Just verify, that the serial part is really an integer but no # need to handle the exception. This would indicate storage corruption, # so we need to stop anyway. serialStr = fileName[len(searchPrefix):-len(searchSuffix)] if serialStr != '': int(serialStr) # So file might match, load the meta data. metaDataFd = -1 fileMetaInfo = None try: metaDataFd = guerillabackup.secureOpenAt( elementDirFd, './%s.info' % fileName[:-5], symlinksAllowedFlag=False, dirOpenFlags=os.O_RDONLY|os.O_DIRECTORY|os.O_NOFOLLOW|os.O_NOCTTY, dirCreateMode=None, fileOpenFlags=os.O_RDONLY|os.O_NOFOLLOW|os.O_NOCTTY) metaInfoData = guerillabackup.readFully(metaDataFd) fileMetaInfo = BackupElementMetainfo.unserialize(metaInfoData) finally: if metaDataFd >= 0: os.close(metaDataFd) if fileMetaInfo.get('DataUuid') != metaData.get('DataUuid'): continue elementId = '%s/%s' % (elementIdParts[0], fileName[:-5]) result = FileStorageBackupDataElement(self.storageDirFd, elementId) break finally: os.close(elementDirFd) return result
def close(self, metaInfo): """Close the backup data element at the sink and receive any pending or current error associated with the writing process. When there is sufficient risk, that data written to the sink is might have been corrupted during transit or storage, the sink may decide to perform a verification operation while closing and return any verification errors here also. @param metaInfo python objects with additional information about this backup data element. This information is added at the end of the sink procedure to allow inclusion of checksum or signature fields created on the fly while writing. See design and implementation documentation for requirements on those objects.""" if self.streamFd is None: raise Exception('Illegal state, already closed') self.elementIdParts = DefaultFileSystemSink.internalGetElementIdParts( self.sourceUrl, metaInfo) # The file name main part between timestamp (with serial) and # suffix as string. fileNameMainStr = '%s-%s' % (self.elementIdParts[1], self.elementIdParts[3]) fileChecksum = metaInfo.get('StorageFileChecksumSha512') metaInfoStr = metaInfo.serialize() try: if fileChecksum != None: # Reread the file and create checksum. os.lseek(self.streamFd, os.SEEK_SET, 0) digestAlgo = hashlib.sha512() while True: data = os.read(self.streamFd, 1 << 20) if len(data) == 0: break digestAlgo.update(data) if fileChecksum != digestAlgo.digest(): raise Exception('Checksum mismatch') # Link the name to the final pathname. serial = -1 storageFileName = None while True: if serial < 0: storageFileName = '%s-%s.data' % (self.elementIdParts[2], fileNameMainStr) else: storageFileName = '%s%d-%s.data' % ( self.elementIdParts[2], serial, fileNameMainStr) serial += 1 try: os.link(self.tmpFileName, storageFileName, src_dir_fd=self.storageDirFd, dst_dir_fd=self.storageDirFd, follow_symlinks=False) break except OSError as linkError: if linkError.errno != errno.EEXIST: raise # Now unlink the old file. With malicious actors we cannot be # sure to unlink the file we have currently opened, but in worst # case some malicious symlink is removed. os.unlink(self.tmpFileName, dir_fd=self.storageDirFd) # Now create the meta-information file. As the data file acted # as a lock, there is nothing to fail except for severe system # failure or malicious activity. So do not attempt to correct # any errors at this stage. Create a temporary version first and # then link it to have atomic completion operation instead of # risk, that another system could pick up the incomplete info # file. metaInfoFileName = storageFileName[:-4] + 'info' metaInfoFd = guerillabackup.secureOpenAt( self.storageDirFd, metaInfoFileName + '.tmp', symlinksAllowedFlag=False, dirOpenFlags=os.O_RDONLY | os.O_DIRECTORY | os.O_NOFOLLOW | os.O_NOCTTY, dirCreateMode=None, fileOpenFlags=os.O_RDWR | os.O_NOFOLLOW | os.O_CREAT | os.O_EXCL | os.O_NOCTTY, fileCreateMode=0o600) os.write(metaInfoFd, metaInfoStr) os.close(metaInfoFd) if self.testModeFlag: # Unlink all artefacts when operating in test mode to avoid accidential os.unlink(storageFileName, dir_fd=self.storageDirFd) os.unlink(metaInfoFileName + '.tmp', dir_fd=self.storageDirFd) raise Exception('No storage in test mode') os.link(metaInfoFileName + '.tmp', metaInfoFileName, src_dir_fd=self.storageDirFd, dst_dir_fd=self.storageDirFd, follow_symlinks=False) os.unlink(metaInfoFileName + '.tmp', dir_fd=self.storageDirFd) finally: os.close(self.storageDirFd) self.storageDirFd = None os.close(self.streamFd) self.streamFd = None
def processInput(self, tarUnitDescription, sink, persistencyDirFd): """Process a single input description by creating the tar stream and updating the indices, if any. When successful, persistency information about this subunit is updated also.""" # Keep time of invocation check and start of backup procedure # also for updating the unit data. currentTime = int(time.time()) (invocationTime, backupType) = tarUnitDescription.getNextInvocationInfo( currentTime) indexFilenamePrefix = None indexFilePathname = None nextIndexFileName = None if tarUnitDescription.incBackupTiming != None: # We will have to create an index, open the index directory at # first. indexFilenamePrefix = tarUnitDescription.sourceUrl[1:].replace('/', '-') # Make sure the filename cannot get longer than 256 bytes, even # with ".index(.bz2).yyyymmddhhmmss" (25 chars) appended. if len(indexFilenamePrefix) > 231: indexFilenamePrefix = indexFilenamePrefix[:231] # Create the new index file. nextIndexFileName = '%s.index.next' % indexFilenamePrefix nextIndexFileHandle = guerillabackup.secureOpenAt( persistencyDirFd, nextIndexFileName, fileOpenFlags=os.O_WRONLY|os.O_CREAT|os.O_EXCL|os.O_NOFOLLOW|os.O_NOCTTY, fileCreateMode=0o600) indexFilePathname = os.path.join( guerillabackup.getPersistencyBaseDirPathname(self.configContext), 'generators', self.unitName, nextIndexFileName) if backupType == 'inc': # See if there is an old index. When missing, change the mode # to "full". indexStatResult = None try: indexStatResult = os.stat( '%s.index' % indexFilenamePrefix, dir_fd=persistencyDirFd, follow_symlinks=False) except OSError as statError: if statError.errno != errno.ENOENT: raise if indexStatResult is None: backupType = 'full' else: # Copy content from current index to new one. currentIndexFileHandle = guerillabackup.secureOpenAt( persistencyDirFd, '%s.index' % indexFilenamePrefix, fileOpenFlags=os.O_RDONLY|os.O_NOFOLLOW|os.O_NOCTTY) while True: data = os.read(currentIndexFileHandle, 1<<20) if len(data) == 0: break os.write(nextIndexFileHandle, data) os.close(currentIndexFileHandle) os.close(nextIndexFileHandle) # Everything is prepared for backup, start it. if tarUnitDescription.preBackupCommandList != None: if self.testModeFlag: print('No invocation of PreBackupCommand in test mode', file=sys.stderr) else: process = subprocess.Popen(tarUnitDescription.preBackupCommandList) returnCode = process.wait() if returnCode != 0: raise Exception('Pre backup command %s failed in %s, source %s' % ( repr(tarUnitDescription.preBackupCommandList)[1:-1], self.unitName, tarUnitDescription.sourceUrl)) # Start the unit itself. backupCommand = tarUnitDescription.getBackupCommand( backupType, indexFilePathname) # Accept creation of tar archives only with zero exit status or # return code 1, when files were concurrently modified and those # races should be ignored. allowedExitStatusList = [0] if tarUnitDescription.ignoreBackupRacesFlag: allowedExitStatusList.append(1) completePipleline = [guerillabackup.OSProcessPipelineElement( '/bin/tar', backupCommand, allowedExitStatusList)] # Get the downstream transformation pipeline elements. completePipleline += guerillabackup.getDefaultDownstreamPipeline( self.configContext, tarUnitDescription.encryptionKeyName) # Build the transformation pipeline instance. sinkHandle = sink.getSinkHandle(tarUnitDescription.sourceUrl) sinkStream = sinkHandle.getSinkStream() # Get the list of started pipeline instances. pipelineInstances = guerillabackup.instantiateTransformationPipeline( completePipleline, None, sinkStream, doStartFlag=True) try: guerillabackup.runTransformationPipeline(pipelineInstances) except: # Just cleanup the incomplete index file when incremental mode # was requested. if not nextIndexFileName is None: os.unlink(nextIndexFileName, dir_fd=persistencyDirFd) raise digestData = pipelineInstances[-1].getDigestData() metaInfoDict = {} metaInfoDict['BackupType'] = backupType if tarUnitDescription.handlingPolicyName != None: metaInfoDict['HandlingPolicy'] = [tarUnitDescription.handlingPolicyName] lastUuid = tarUnitDescription.lastUuidValue currentUuidDigest = hashlib.sha512() if lastUuid != None: metaInfoDict['Predecessor'] = lastUuid currentUuidDigest.update(lastUuid) # Add the compressed file digest to make UUID different for different # content. currentUuidDigest.update(digestData) # Also include the timestamp and source URL in the UUID calculation # to make UUID different for backup of identical data at (nearly) # same time. currentUuidDigest.update(bytes('%d %s' % ( currentTime, tarUnitDescription.sourceUrl), sys.getdefaultencoding())) currentUuid = currentUuidDigest.digest() metaInfoDict['DataUuid'] = currentUuid metaInfoDict['StorageFileChecksumSha512'] = digestData metaInfoDict['Timestamp'] = currentTime metaInfo = BackupElementMetainfo(metaInfoDict) sinkHandle.close(metaInfo) if self.testModeFlag: raise Exception('No completion of tar backup in test mode') if tarUnitDescription.postBackupCommandList != None: process = subprocess.Popen(tarUnitDescription.postBackupCommandList) returnCode = process.wait() if returnCode != 0: # Still raise an exception and thus prohibit completion of this # tar backup. The PostBackupCommand itself cannot have an influence # on the backup created before but the failure might indicate, # that the corresponding PreBackupCommand was problematic. Thus # let the user resolve the problem manually. raise Exception('Post backup command %s failed in %s, source %s' % ( repr(tarUnitDescription.postBackupCommandList)[1:-1], self.unitName, tarUnitDescription.sourceUrl)) if tarUnitDescription.incBackupTiming != None: # See if there is an old index to compress and move, but only # if it should be really kept. Currently fstatat function is not # available, so use open/fstat instead. currentIndexFd = None currentIndexName = '%s.index' % indexFilenamePrefix try: currentIndexFd = guerillabackup.secureOpenAt( persistencyDirFd, currentIndexName, fileOpenFlags=os.O_RDONLY|os.O_NOFOLLOW|os.O_NOCTTY) except OSError as indexOpenError: if indexOpenError.errno != errno.ENOENT: raise targetFileName = None if currentIndexFd != None: if tarUnitDescription.keepOldIndicesCount == 0: os.close(currentIndexFd) os.unlink(currentIndexName, dir_fd=persistencyDirFd) else: statData = os.fstat(currentIndexFd) targetFileTime = int(statData.st_mtime) targetFileHandle = None while True: date = datetime.datetime.fromtimestamp(targetFileTime) dateStr = date.strftime('%Y%m%d%H%M%S') targetFileName = '%s.index.bz2.%s' % (indexFilenamePrefix, dateStr) try: targetFileHandle = guerillabackup.secureOpenAt( persistencyDirFd, targetFileName, fileOpenFlags=os.O_WRONLY|os.O_CREAT|os.O_EXCL|os.O_NOFOLLOW|os.O_NOCTTY, fileCreateMode=0o600) break except OSError as indexBackupOpenError: if indexBackupOpenError.errno != errno.EEXIST: raise targetFileTime += 1 # Now both handles are valid, use external bzip2 binary to perform # compression. process = subprocess.Popen( ['/bin/bzip2', '-c9'], stdin=currentIndexFd, stdout=targetFileHandle) returnCode = process.wait() if returnCode != 0: raise Exception('Failed to compress the old index: %s' % returnCode) os.close(currentIndexFd) # FIXME: we should use utime with targetFileHandle as pathlike # object, only available in Python3.6 and later. os.utime( '/proc/self/fd/%d' % targetFileHandle, (statData.st_mtime, statData.st_mtime)) os.close(targetFileHandle) os.unlink(currentIndexName, dir_fd=persistencyDirFd) # Now previous index was compressed or deleted, link the next # index to the current position. os.link( nextIndexFileName, currentIndexName, src_dir_fd=persistencyDirFd, dst_dir_fd=persistencyDirFd, follow_symlinks=False) os.unlink(nextIndexFileName, dir_fd=persistencyDirFd) if tarUnitDescription.keepOldIndicesCount != -1: # So we should apply limits to the number of index backups. fileList = [] searchPrefix = '%s.index.bz2.' % indexFilenamePrefix searchLength = len(searchPrefix)+14 for fileName in guerillabackup.listDirAt(persistencyDirFd): if ((len(fileName) != searchLength) or (not fileName.startswith(searchPrefix))): continue fileList.append(fileName) fileList.sort() if len(fileList) > tarUnitDescription.keepOldIndicesCount: # Make sure that the new index file was sorted last. When not, # the current state could indicate clock/time problems on the # machine. Refuse to process the indices and issue a warning. indexBackupPos = fileList.index(targetFileName) if indexBackupPos+1 != len(fileList): raise Exception('Sorting of old backup indices inconsistent, refusing cleanup') for fileName in fileList[:-tarUnitDescription.keepOldIndicesCount]: os.unlink(fileName, dir_fd=persistencyDirFd) # Update the UUID map as last step: if any of the steps above # would fail, currentUuid generated in next run will be identical # to this. Sorting out the duplicates will be easy. tarUnitDescription.lastUuidValue = currentUuid # Update the timestamp. tarUnitDescription.lastAnyBackupTime = currentTime if backupType == 'full': tarUnitDescription.lastFullBackupTime = currentTime # Write the new persistency data before returning. self.updateStateData(persistencyDirFd)
def __init__(self, unitName, configContext): """Initialize this unit using the given configuration.""" self.unitName = unitName self.configContext = configContext self.testModeFlag = configContext.get(guerillabackup.CONFIG_GENERAL_DEBUG_TEST_MODE_KEY, False) if not isinstance(self.testModeFlag, bool): raise Exception('Configuration parameter %s has to be ' \ 'boolean' % guerillabackup.CONFIG_GENERAL_DEBUG_TEST_MODE_KEY) backupConfigList = configContext.get(CONFIG_LIST_KEY, None) if (backupConfigList is None) or (not isinstance(backupConfigList, dict)): raise Exception('Configuration parameter %s missing or of wrong type' % CONFIG_LIST_KEY) self.backupUnitDescriptions = {} for sourceUrl, configDef in backupConfigList.items(): self.backupUnitDescriptions[sourceUrl] = TarBackupUnitDescription( sourceUrl, configDef) # Start loading the persistency information. persistencyDirFd = None persistencyFileHandle = None stateData = None try: persistencyDirFd = guerillabackup.openPersistencyFile( configContext, os.path.join('generators', self.unitName), os.O_DIRECTORY|os.O_RDONLY|os.O_CREAT|os.O_EXCL|os.O_NOFOLLOW|os.O_NOCTTY, 0o700) try: persistencyFileHandle = guerillabackup.secureOpenAt( persistencyDirFd, 'state.current', fileOpenFlags=os.O_RDONLY|os.O_NOFOLLOW|os.O_NOCTTY) except OSError as openError: if openError.errno != errno.ENOENT: raise # See if the state.previous file exists, if yes, the unit is likely # to be broken. Refuse to do anything while in this state. try: os.stat( 'state.previous', dir_fd=persistencyDirFd, follow_symlinks=False) raise Exception( 'Persistency data inconsistencies: found stale previous state file') except OSError as statError: if statError.errno != errno.ENOENT: raise # So there is only the current state file, if any. if persistencyFileHandle != None: stateData = b'' while True: data = os.read(persistencyFileHandle, 1<<20) if len(data) == 0: break stateData += data os.close(persistencyFileHandle) persistencyFileHandle = None finally: if persistencyFileHandle != None: os.close(persistencyFileHandle) if persistencyDirFd != None: os.close(persistencyDirFd) # Start mangling of data after closing all file handles. if stateData is None: print('%s: first time activation, no persistency data found' % self.unitName, file=sys.stderr) else: stateInfo = json.loads(str(stateData, 'ascii')) if not isinstance(stateInfo, dict): raise Exception('Persistency data structure mismatch') for url, stateData in stateInfo.items(): description = self.backupUnitDescriptions.get(url, None) if description is None: # Ignore this state, user might have removed a single tar backup # configuration without deleting the UUID and timing data. print('No tar backup configuration for %s resource state data %s' % ( url, repr(stateData)), file=sys.stderr) continue description.lastFullBackupTime = stateData[0] description.lastAnyBackupTime = stateData[1] # The UUID is kept internally as binary data string. Only for # persistency, data will be base64 encoded. description.lastUuidValue = base64.b64decode(stateData[2])
def invokeUnit(self, sink): """Invoke this unit to create backup elements and pass them on to the sink. Even when indicated via getNextInvocationTime, the unit may decide, that it is not yet ready and not write any element to the sink. @return None if currently there is nothing to write to the source, a number of seconds to retry invocation if the unit assumes, that there is data to be processed but processing cannot start yet, e.g. due to locks held by other parties or resource, e.g. network storages, currently not available.""" nextInvocationDelta = self.getNextInvocationTime() invocationAttemptedFlag = False try: if nextInvocationDelta == 0: # We are now ready for processing. Get the list of source directories # and search patterns to locate the target files. unitInputListConfig = self.configContext.get( CONFIG_INPUT_LIST_KEY, None) invocationAttemptedFlag = True nextInvocationDelta = None if unitInputListConfig is None: print('Suspected configuration error: LogfileBackupUnit ' \ 'enabled but %s configuration list empty' % CONFIG_INPUT_LIST_KEY, file=sys.stderr) else: for configItem in unitInputListConfig: unitInput = None try: unitInput = LogfileBackupUnitInputDescription( configItem) except Exception as configReadException: print('LogfileBackupUnit: failed to use configuration ' \ '%s: %s' % ( repr(configItem), configReadException.args[0]), file=sys.stderr) continue # Configuration parsing worked, start processing the inputs. self.processInput(unitInput, sink) finally: if invocationAttemptedFlag: try: # Update the timestamp. self.lastInvocationTime = int(time.time()) # Write back the new state information immediately after invocation # to avoid data loss when program crashes immediately afterwards. # Keep one old version of state file. try: os.unlink('state.old', dir_fd=self.persistencyDirFd) except OSError as relinkError: if relinkError.errno != errno.ENOENT: raise try: os.link('state.current', 'state.old', src_dir_fd=self.persistencyDirFd, dst_dir_fd=self.persistencyDirFd, follow_symlinks=False) except OSError as relinkError: if relinkError.errno != errno.ENOENT: raise try: os.unlink('state.current', dir_fd=self.persistencyDirFd) except OSError as relinkError: if relinkError.errno != errno.ENOENT: raise handle = guerillabackup.secureOpenAt( self.persistencyDirFd, 'state.current', fileOpenFlags=os.O_WRONLY | os.O_CREAT | os.O_EXCL | os.O_NOFOLLOW | os.O_NOCTTY, fileCreateMode=0o600) writeResourceUuidMap = {} for url, uuidData in self.resourceUuidMap.items(): writeResourceUuidMap[url] = str( base64.b64encode(uuidData), 'ascii') os.write( handle, json.dumps( [self.lastInvocationTime, writeResourceUuidMap]).encode('ascii')) os.close(handle) except Exception as stateSaveException: # Writing of state information failed. Print out the state information # for manual reconstruction as last resort. print('Writing of state information failed: %s\nCurrent ' \ 'state: %s' % ( str(stateSaveException), repr([self.lastInvocationTime, self.resourceUuidMap])), file=sys.stderr) traceback.print_tb(sys.exc_info()[2]) raise
def processInput(self, unitInput, sink): """Process a single input description by searching for files that could be written to the sink.""" inputDirectoryFd = None getFileOpenerInformationErrorMode = guerillabackup.OPENER_INFO_FAIL_ON_ERROR if os.geteuid() != 0: getFileOpenerInformationErrorMode = guerillabackup.OPENER_INFO_IGNORE_ACCESS_ERRORS try: inputDirectoryFd = guerillabackup.secureOpenAt( None, unitInput.inputDirectoryName, fileOpenFlags=os.O_DIRECTORY | os.O_RDONLY | os.O_NOFOLLOW | os.O_NOCTTY) sourceDict = {} for fileName in guerillabackup.listDirAt(inputDirectoryFd): matcher = unitInput.inputFileRegex.match(fileName) if matcher is None: continue sourceUrl = unitInput.getTransformedSourceName(matcher) sourceInfo = sourceDict.get(sourceUrl, None) if sourceInfo is None: sourceInfo = LogfileSourceInfo(sourceUrl) sourceDict[sourceUrl] = sourceInfo sourceInfo.addFile(fileName, matcher) # Now we know all files to be included for each URL. Sort them # to fulfill Req:OrderedProcessing and start with the oldest. for sourceUrl, sourceInfo in sourceDict.items(): if not sourceInfo.serialTypesConsistentFlag: print('Inconsistent serial types in %s, ignoring ' \ 'source.' % sourceInfo.sourceUrl, file=sys.stderr) continue # Get the downstream transformation pipeline elements. downstreamPipelineElements = \ guerillabackup.getDefaultDownstreamPipeline( self.configContext, unitInput.encryptionKeyName) fileList = sourceInfo.getSortedFileList() fileInfoList = guerillabackup.getFileOpenerInformation([ '%s/%s' % (unitInput.inputDirectoryName, x[0]) for x in fileList ], getFileOpenerInformationErrorMode) for fileListIndex in range(0, len(fileList)): fileName, matcher, serialData = fileList[fileListIndex] # Make sure, that the file is not written any more. logFilePathName = os.path.join( unitInput.inputDirectoryName, fileName) isOpenForWritingFlag = False if fileInfoList[fileListIndex] != None: for pid, fdInfoList in fileInfoList[fileListIndex]: for fdNum, fdOpenFlags in fdInfoList: if fdOpenFlags == 0o100001: print('File %s is still written by pid %d, ' \ 'fd %d' % (logFilePathName, pid, fdNum), file=sys.stderr) isOpenForWritingFlag = True elif fdOpenFlags != 0o100000: print('File %s unknown open flags 0x%x by pid %d, ' \ 'fd %d' % ( logFilePathName, fdOpenFlags, pid, fdNum), file=sys.stderr) isOpenForWritingFlag = True # Files have to be processed in correct order, so we have to stop # here. if isOpenForWritingFlag: break completePipleline = downstreamPipelineElements compressionType = matcher.groupdict().get('compress', None) if compressionType != None: # Source file is compressed, prepend a suffix/content-specific # decompression element. compressionElement = None if compressionType == 'gz': compressionElement = guerillabackup.OSProcessPipelineElement( '/bin/gzip', ['/bin/gzip', '-cd']) else: raise Exception( 'Unkown compression type %s for file %s/%s' % (compressionType, unitInput.inputDirectoryName, fileName)) completePipleline = [compressionElement ] + completePipleline[:] logFileFd = guerillabackup.secureOpenAt( inputDirectoryFd, fileName, fileOpenFlags=os.O_RDONLY | os.O_NOFOLLOW | os.O_NOCTTY) logFileStatData = os.fstat(logFileFd) # By wrapping the logFileFd into this object, the first pipeline # element will close it. So we do not need to care here. logFileOutput = TransformationProcessOutputStream( logFileFd) sinkHandle = sink.getSinkHandle(sourceInfo.sourceUrl) sinkStream = sinkHandle.getSinkStream() # Get the list of started pipeline instances. pipelineInstances = guerillabackup.instantiateTransformationPipeline( completePipleline, logFileOutput, sinkStream, doStartFlag=True) guerillabackup.runTransformationPipeline(pipelineInstances) digestData = pipelineInstances[-1].getDigestData() metaInfoDict = {} metaInfoDict['BackupType'] = 'full' if unitInput.handlingPolicyName != None: metaInfoDict['HandlingPolicy'] = [ unitInput.handlingPolicyName ] lastUuid = self.resourceUuidMap.get( sourceInfo.sourceUrl, None) currentUuidDigest = hashlib.sha512() if lastUuid != None: metaInfoDict['Predecessor'] = lastUuid currentUuidDigest.update(lastUuid) # Add the compressed file digest. The consequence is, that it # will not be completely obvious when the same file was processed # with twice with encryption enabled and processing failed in # late phase. Therefore identical file content cannot be detected. currentUuidDigest.update(digestData) # Also include the timestamp and original filename of the source # file in the UUID calculation: Otherwise retransmissions of files # with identical content cannot be distinguished. currentUuidDigest.update( bytes('%d %s' % (logFileStatData.st_mtime, fileName), sys.getdefaultencoding())) currentUuid = currentUuidDigest.digest() metaInfoDict['DataUuid'] = currentUuid metaInfoDict['StorageFileChecksumSha512'] = digestData metaInfoDict['Timestamp'] = int(logFileStatData.st_mtime) metaInfo = BackupElementMetainfo(metaInfoDict) sinkHandle.close(metaInfo) if self.testModeFlag: raise Exception( 'No completion of logfile backup in test mode') # Delete the logfile. os.unlink(fileName, dir_fd=inputDirectoryFd) # Update the UUID map as last step: if any of the steps above # would fail, currentUuid generated in next run will be identical # to this. Sorting out the duplicates will be easy. self.resourceUuidMap[sourceInfo.sourceUrl] = currentUuid finally: if inputDirectoryFd != None: os.close(inputDirectoryFd)
def __init__(self, unitName, configContext): """Initialize this unit using the given configuration.""" self.unitName = unitName self.configContext = configContext # This is the maximum interval in seconds between two invocations. # When last invocation was more than that number of seconds in # the past, the unit will attempt invocation at first possible # moment. self.maxInvocationInterval = 3600 # When this value is not zero, the unit will attempt to trigger # invocation always at the same time using this value as modulus. self.moduloInvocationUnit = 3600 # This is the invocation offset when modulus timing is enabled. self.moduloInvocationTime = 0 # As immediate invocation cannot be guaranteed, this value defines # the size of the window, within that the unit should still be # invoked, even when the targeted time slot has already passed # by. self.moduloInvocationTimeWindow = 10 self.testModeFlag = configContext.get( guerillabackup.CONFIG_GENERAL_DEBUG_TEST_MODE_KEY, False) if not isinstance(self.testModeFlag, bool): raise Exception('Configuration parameter %s has to be ' \ 'boolean' % guerillabackup.CONFIG_GENERAL_DEBUG_TEST_MODE_KEY) # Timestamp of last invocation end. self.lastInvocationTime = -1 # Map from resource name to UUID of most recent file processed. # The UUID is kept internally as binary data string. Only for # persistency, data will be base64 encoded. self.resourceUuidMap = {} self.persistencyDirFd = guerillabackup.openPersistencyFile( configContext, os.path.join('generators', self.unitName), os.O_DIRECTORY | os.O_RDONLY | os.O_CREAT | os.O_EXCL | os.O_NOFOLLOW | os.O_NOCTTY, 0o700) handle = None try: handle = guerillabackup.secureOpenAt(self.persistencyDirFd, 'state.current', fileOpenFlags=os.O_RDONLY | os.O_NOFOLLOW | os.O_NOCTTY) except OSError as openError: if openError.errno != errno.ENOENT: raise # See if the state.previous file exists, if yes, the unit is likely # to be broken. Refuse to do anything while in this state. try: os.stat('state.previous', dir_fd=self.persistencyDirFd, follow_symlinks=False) raise Exception( 'Persistency data inconsistencies: found stale previous state file' ) except OSError as statError: if statError.errno != errno.ENOENT: raise # So there is only the current state file, if any. stateInfo = None if handle != None: stateData = b'' while True: data = os.read(handle, 1 << 20) if len(data) == 0: break stateData += data os.close(handle) stateInfo = json.loads(str(stateData, 'ascii')) if ((not isinstance(stateInfo, list)) or (len(stateInfo) != 2) or (not isinstance(stateInfo[0], int)) or (not isinstance(stateInfo[1], dict))): raise Exception('Persistency data structure mismatch') self.lastInvocationTime = stateInfo[0] self.resourceUuidMap = stateInfo[1] for url, uuidData in self.resourceUuidMap.items(): self.resourceUuidMap[url] = base64.b64decode(uuidData)