예제 #1
0
    def __init__(self, storageDirFd, testModeFlag, sourceUrl):
        """Create a temporary storage file and a handle to it."""
        self.testModeFlag = testModeFlag
        self.sourceUrl = sourceUrl
        guerillabackup.assertSourceUrlSpecificationConforming(sourceUrl)
        self.elementIdParts = DefaultFileSystemSink.internalGetElementIdParts(
            sourceUrl, None)

        self.storageDirFd = None
        if self.elementIdParts[0] == '':
            self.storageDirFd = os.dup(storageDirFd)
        else:
            self.storageDirFd = guerillabackup.secureOpenAt(
                storageDirFd,
                self.elementIdParts[0][1:],
                symlinksAllowedFlag=False,
                dirOpenFlags=os.O_RDONLY | os.O_DIRECTORY | os.O_NOFOLLOW
                | os.O_NOCTTY,
                dirCreateMode=0o700,
                fileOpenFlags=os.O_DIRECTORY | os.O_RDONLY | os.O_NOFOLLOW
                | os.O_CREAT | os.O_EXCL | os.O_NOCTTY,
                fileCreateMode=0o700)

# Generate a temporary file name in the same directory.
        while True:
            self.tmpFileName = 'tmp-%s-%d' % (self.elementIdParts[1],
                                              random.randint(0, 1 << 30))
            try:
                self.streamFd = guerillabackup.secureOpenAt(
                    self.storageDirFd,
                    self.tmpFileName,
                    symlinksAllowedFlag=False,
                    dirOpenFlags=os.O_RDONLY | os.O_DIRECTORY | os.O_NOFOLLOW
                    | os.O_NOCTTY,
                    dirCreateMode=None,
                    fileOpenFlags=os.O_RDWR | os.O_NOFOLLOW | os.O_CREAT
                    | os.O_EXCL | os.O_NOCTTY,
                    fileCreateMode=0o600)
                break
            except OSError as openError:
                if openError.errno != errno.EEXIST:
                    os.close(self.storageDirFd)
                    raise
예제 #2
0
  def updateStateData(self, persistencyDirFd):
    """Replace the current state data file with one containing
    the current unit internal state.
    @throw Exception is writing fails for any reason. The unit
    will be in incorrectable state afterwards."""

# Create the data structures for writing.
    stateData = {}
    for sourceUrl, description in self.backupUnitDescriptions.items():
      stateData[sourceUrl] = description.getJsonData()
    writeData = bytes(json.dumps(stateData), 'ascii')

# Try to replace the current state file. At first unlink the old
# one.
    try:
      os.unlink('state.old', dir_fd=persistencyDirFd)
    except OSError as unlinkError:
      if unlinkError.errno != errno.ENOENT:
        raise
# Link the current to the old one.
    try:
      os.link(
          'state.current', 'state.old', src_dir_fd=persistencyDirFd,
          dst_dir_fd=persistencyDirFd, follow_symlinks=False)
    except OSError as relinkError:
      if relinkError.errno != errno.ENOENT:
        raise
# Unlink the current state. Thus we can then use O_EXCL on create.
    try:
      os.unlink('state.current', dir_fd=persistencyDirFd)
    except OSError as relinkError:
      if relinkError.errno != errno.ENOENT:
        raise
# Create the new file.
    fileHandle = None
    try:
      fileHandle = guerillabackup.secureOpenAt(
          persistencyDirFd, 'state.current',
          fileOpenFlags=os.O_WRONLY|os.O_CREAT|os.O_EXCL|os.O_NOFOLLOW|os.O_NOCTTY,
          fileCreateMode=0o600)
      os.write(fileHandle, writeData)
# Also close handle within try, except block to catch also delayed
# errors after write.
      os.close(fileHandle)
      fileHandle = None
    except Exception as stateSaveException:
# Writing of state information failed. Print out the state information
# for manual reconstruction as last resort.
      print('Writing of state information failed: %s\nCurrent state: ' \
          '%s' % (str(stateSaveException), repr(writeData)), file=sys.stderr)
      traceback.print_tb(sys.exc_info()[2])
      raise
    finally:
      if fileHandle != None:
        os.close(fileHandle)
예제 #3
0
 def openElementFile(self, name, fileOpenFlags=None):
   """Open the element file with given name.
   @param fileOpenFlags when None, open the file readonly without
   creating it.
   @return the file descriptor to the new file."""
   if fileOpenFlags is None:
     fileOpenFlags = os.O_RDONLY|os.O_NOFOLLOW|os.O_NOCTTY
   valueFileName = '.'+self.elementId+'.'+name
   elementFd = guerillabackup.secureOpenAt(
       self.storageDirFd, valueFileName, symlinksAllowedFlag=False,
       dirOpenFlags=os.O_RDONLY|os.O_DIRECTORY|os.O_NOFOLLOW|os.O_NOCTTY,
       dirCreateMode=None,
       fileOpenFlags=fileOpenFlags)
   return elementFd
예제 #4
0
 def delete(self):
   """Delete this data element. This will remove all files for
   this element. The resource should be locked by the process
   attempting removal if concurrent access is possible."""
   lastFileSepPos = self.elementId.rfind('/')
   dirFd = guerillabackup.secureOpenAt(
       self.storageDirFd, '.'+self.elementId[:lastFileSepPos],
       symlinksAllowedFlag=False,
       dirOpenFlags=os.O_RDONLY|os.O_DIRECTORY|os.O_NOFOLLOW|os.O_NOCTTY,
       dirCreateMode=None,
       fileOpenFlags=os.O_RDONLY|os.O_DIRECTORY|os.O_NOFOLLOW|os.O_NOCTTY)
   try:
     fileNamePrefix = self.elementId[lastFileSepPos+1:]
     for fileName in guerillabackup.listDirAt(dirFd):
       if fileName.startswith(fileNamePrefix):
         os.unlink(fileName, dir_fd=dirFd)
   finally:
     os.close(dirFd)
예제 #5
0
    def openStorageDir(self, storageDirName, configContext):
        """Open the storage behind the sink. This method may only
    be called once."""
        if self.storageDirName != None:
            raise Exception('Already defined')
        self.storageDirName = storageDirName
        self.storageDirFd = guerillabackup.secureOpenAt(
            -1,
            self.storageDirName,
            symlinksAllowedFlag=False,
            dirOpenFlags=os.O_RDONLY | os.O_DIRECTORY | os.O_NOFOLLOW
            | os.O_NOCTTY,
            dirCreateMode=None,
            fileOpenFlags=os.O_DIRECTORY | os.O_RDONLY | os.O_NOFOLLOW
            | os.O_NOCTTY,
            fileCreateMode=0o700)

        self.testModeFlag = configContext.get(
            guerillabackup.CONFIG_GENERAL_DEBUG_TEST_MODE_KEY, False)
        if not isinstance(self.testModeFlag, bool):
            raise Exception('Configuration parameter %s has to be ' \
                'boolean' % guerillabackup.CONFIG_GENERAL_DEBUG_TEST_MODE_KEY)
예제 #6
0
  def getBackupDataElementForMetaData(self, sourceUrl, metaData):
    """Retrieve a single stored backup data element from the storage.
    @param sourceUrl the URL identifying the source that produced
    the stored data elements.
    @param metaData metaData dictionary for the element of interest.
    @throws Exception when an incompatible query, update or read
    is in progress.
    @return the element or None if no matching element was found."""
# At first get an iterator over all elements in file system that
# might match the given query.
    guerillabackup.assertSourceUrlSpecificationConforming(sourceUrl)
    elementIdParts = \
        guerillabackup.DefaultFileSystemSink.internalGetElementIdParts(
            sourceUrl, metaData)
# Now search the directory for all files conforming to the specifiction.
# As there may exist multiple files with the same time stamp and
# type, load also the meta data and check if matches the query.
    elementDirFd = None
    if len(elementIdParts[0]) == 0:
      elementDirFd = os.dup(self.storageDirFd)
    else:
      try:
        elementDirFd = guerillabackup.secureOpenAt(
            self.storageDirFd, elementIdParts[0][1:], symlinksAllowedFlag=False,
            dirOpenFlags=os.O_RDONLY|os.O_DIRECTORY|os.O_NOFOLLOW|os.O_NOCTTY,
            dirCreateMode=0o700,
            fileOpenFlags=os.O_DIRECTORY|os.O_RDONLY|os.O_NOFOLLOW|os.O_CREAT|os.O_EXCL|os.O_NOCTTY)
      except OSError as dirOpenError:
# Directory does not exist, so there cannot be any valid element.
        if dirOpenError.errno == errno.ENOENT:
          return None
        raise
    searchPrefix = elementIdParts[2]
    searchSuffix = '-%s-%s.data' % (elementIdParts[1], elementIdParts[3])
    result = None
    try:
      fileList = guerillabackup.listDirAt(elementDirFd)
      for fileName in fileList:
        if ((not fileName.startswith(searchPrefix)) or
            (not fileName.endswith(searchSuffix))):
          continue
# Just verify, that the serial part is really an integer but no
# need to handle the exception. This would indicate storage corruption,
# so we need to stop anyway.
        serialStr = fileName[len(searchPrefix):-len(searchSuffix)]
        if serialStr != '':
          int(serialStr)
# So file might match, load the meta data.
        metaDataFd = -1
        fileMetaInfo = None
        try:
          metaDataFd = guerillabackup.secureOpenAt(
              elementDirFd, './%s.info' % fileName[:-5],
              symlinksAllowedFlag=False,
              dirOpenFlags=os.O_RDONLY|os.O_DIRECTORY|os.O_NOFOLLOW|os.O_NOCTTY,
              dirCreateMode=None,
              fileOpenFlags=os.O_RDONLY|os.O_NOFOLLOW|os.O_NOCTTY)
          metaInfoData = guerillabackup.readFully(metaDataFd)
          fileMetaInfo = BackupElementMetainfo.unserialize(metaInfoData)
        finally:
          if metaDataFd >= 0:
            os.close(metaDataFd)
        if fileMetaInfo.get('DataUuid') != metaData.get('DataUuid'):
          continue
        elementId = '%s/%s' % (elementIdParts[0], fileName[:-5])
        result = FileStorageBackupDataElement(self.storageDirFd, elementId)
        break

    finally:
      os.close(elementDirFd)
    return result
예제 #7
0
    def close(self, metaInfo):
        """Close the backup data element at the sink and receive any
    pending or current error associated with the writing process.
    When there is sufficient risk, that data written to the sink
    is might have been corrupted during transit or storage, the
    sink may decide to perform a verification operation while
    closing and return any verification errors here also.
    @param metaInfo python objects with additional information
    about this backup data element. This information is added
    at the end of the sink procedure to allow inclusion of checksum
    or signature fields created on the fly while writing. See
    design and implementation documentation for requirements on
    those objects."""
        if self.streamFd is None:
            raise Exception('Illegal state, already closed')
        self.elementIdParts = DefaultFileSystemSink.internalGetElementIdParts(
            self.sourceUrl, metaInfo)

        # The file name main part between timestamp (with serial) and
        # suffix as string.
        fileNameMainStr = '%s-%s' % (self.elementIdParts[1],
                                     self.elementIdParts[3])
        fileChecksum = metaInfo.get('StorageFileChecksumSha512')
        metaInfoStr = metaInfo.serialize()

        try:
            if fileChecksum != None:
                # Reread the file and create checksum.
                os.lseek(self.streamFd, os.SEEK_SET, 0)
                digestAlgo = hashlib.sha512()
                while True:
                    data = os.read(self.streamFd, 1 << 20)
                    if len(data) == 0:
                        break
                    digestAlgo.update(data)
                if fileChecksum != digestAlgo.digest():
                    raise Exception('Checksum mismatch')

# Link the name to the final pathname.
            serial = -1
            storageFileName = None
            while True:
                if serial < 0:
                    storageFileName = '%s-%s.data' % (self.elementIdParts[2],
                                                      fileNameMainStr)
                else:
                    storageFileName = '%s%d-%s.data' % (
                        self.elementIdParts[2], serial, fileNameMainStr)
                serial += 1
                try:
                    os.link(self.tmpFileName,
                            storageFileName,
                            src_dir_fd=self.storageDirFd,
                            dst_dir_fd=self.storageDirFd,
                            follow_symlinks=False)
                    break
                except OSError as linkError:
                    if linkError.errno != errno.EEXIST:
                        raise


# Now unlink the old file. With malicious actors we cannot be
# sure to unlink the file we have currently opened, but in worst
# case some malicious symlink is removed.
            os.unlink(self.tmpFileName, dir_fd=self.storageDirFd)

            # Now create the meta-information file. As the data file acted
            # as a lock, there is nothing to fail except for severe system
            # failure or malicious activity. So do not attempt to correct
            # any errors at this stage. Create a temporary version first and
            # then link it to have atomic completion operation instead of
            # risk, that another system could pick up the incomplete info
            # file.
            metaInfoFileName = storageFileName[:-4] + 'info'
            metaInfoFd = guerillabackup.secureOpenAt(
                self.storageDirFd,
                metaInfoFileName + '.tmp',
                symlinksAllowedFlag=False,
                dirOpenFlags=os.O_RDONLY | os.O_DIRECTORY | os.O_NOFOLLOW
                | os.O_NOCTTY,
                dirCreateMode=None,
                fileOpenFlags=os.O_RDWR | os.O_NOFOLLOW | os.O_CREAT
                | os.O_EXCL | os.O_NOCTTY,
                fileCreateMode=0o600)
            os.write(metaInfoFd, metaInfoStr)
            os.close(metaInfoFd)
            if self.testModeFlag:
                # Unlink all artefacts when operating in test mode to avoid accidential
                os.unlink(storageFileName, dir_fd=self.storageDirFd)
                os.unlink(metaInfoFileName + '.tmp', dir_fd=self.storageDirFd)
                raise Exception('No storage in test mode')
            os.link(metaInfoFileName + '.tmp',
                    metaInfoFileName,
                    src_dir_fd=self.storageDirFd,
                    dst_dir_fd=self.storageDirFd,
                    follow_symlinks=False)
            os.unlink(metaInfoFileName + '.tmp', dir_fd=self.storageDirFd)
        finally:
            os.close(self.storageDirFd)
            self.storageDirFd = None
            os.close(self.streamFd)
            self.streamFd = None
예제 #8
0
  def processInput(self, tarUnitDescription, sink, persistencyDirFd):
    """Process a single input description by creating the tar
    stream and updating the indices, if any. When successful,
    persistency information about this subunit is updated also."""
# Keep time of invocation check and start of backup procedure
# also for updating the unit data.
    currentTime = int(time.time())
    (invocationTime, backupType) = tarUnitDescription.getNextInvocationInfo(
        currentTime)

    indexFilenamePrefix = None
    indexFilePathname = None
    nextIndexFileName = None
    if tarUnitDescription.incBackupTiming != None:
# We will have to create an index, open the index directory at
# first.
      indexFilenamePrefix = tarUnitDescription.sourceUrl[1:].replace('/', '-')
# Make sure the filename cannot get longer than 256 bytes, even
# with ".index(.bz2).yyyymmddhhmmss" (25 chars) appended.
      if len(indexFilenamePrefix) > 231:
        indexFilenamePrefix = indexFilenamePrefix[:231]

# Create the new index file.
      nextIndexFileName = '%s.index.next' % indexFilenamePrefix
      nextIndexFileHandle = guerillabackup.secureOpenAt(
          persistencyDirFd, nextIndexFileName,
          fileOpenFlags=os.O_WRONLY|os.O_CREAT|os.O_EXCL|os.O_NOFOLLOW|os.O_NOCTTY,
          fileCreateMode=0o600)
      indexFilePathname = os.path.join(
          guerillabackup.getPersistencyBaseDirPathname(self.configContext),
          'generators', self.unitName, nextIndexFileName)

      if backupType == 'inc':
# See if there is an old index. When missing, change the mode
# to "full".
        indexStatResult = None
        try:
          indexStatResult = os.stat(
              '%s.index' % indexFilenamePrefix, dir_fd=persistencyDirFd,
              follow_symlinks=False)
        except OSError as statError:
          if statError.errno != errno.ENOENT:
            raise
        if indexStatResult is None:
          backupType = 'full'
        else:
# Copy content from current index to new one.
          currentIndexFileHandle = guerillabackup.secureOpenAt(
              persistencyDirFd, '%s.index' % indexFilenamePrefix,
              fileOpenFlags=os.O_RDONLY|os.O_NOFOLLOW|os.O_NOCTTY)
          while True:
            data = os.read(currentIndexFileHandle, 1<<20)
            if len(data) == 0:
              break
            os.write(nextIndexFileHandle, data)
          os.close(currentIndexFileHandle)
      os.close(nextIndexFileHandle)

# Everything is prepared for backup, start it.
    if tarUnitDescription.preBackupCommandList != None:
      if self.testModeFlag:
        print('No invocation of PreBackupCommand in test mode', file=sys.stderr)
      else:
        process = subprocess.Popen(tarUnitDescription.preBackupCommandList)
        returnCode = process.wait()
        if returnCode != 0:
          raise Exception('Pre backup command %s failed in %s, source %s' % (
              repr(tarUnitDescription.preBackupCommandList)[1:-1],
              self.unitName, tarUnitDescription.sourceUrl))

# Start the unit itself.
    backupCommand = tarUnitDescription.getBackupCommand(
        backupType, indexFilePathname)
# Accept creation of tar archives only with zero exit status or
# return code 1, when files were concurrently modified and those
# races should be ignored.
    allowedExitStatusList = [0]
    if tarUnitDescription.ignoreBackupRacesFlag:
      allowedExitStatusList.append(1)
    completePipleline = [guerillabackup.OSProcessPipelineElement(
        '/bin/tar', backupCommand, allowedExitStatusList)]
# Get the downstream transformation pipeline elements.
    completePipleline += guerillabackup.getDefaultDownstreamPipeline(
        self.configContext, tarUnitDescription.encryptionKeyName)

# Build the transformation pipeline instance.
    sinkHandle = sink.getSinkHandle(tarUnitDescription.sourceUrl)
    sinkStream = sinkHandle.getSinkStream()

# Get the list of started pipeline instances.
    pipelineInstances = guerillabackup.instantiateTransformationPipeline(
        completePipleline, None, sinkStream, doStartFlag=True)
    try:
      guerillabackup.runTransformationPipeline(pipelineInstances)
    except:
# Just cleanup the incomplete index file when incremental mode
# was requested.
      if not nextIndexFileName is None:
        os.unlink(nextIndexFileName, dir_fd=persistencyDirFd)
      raise

    digestData = pipelineInstances[-1].getDigestData()

    metaInfoDict = {}
    metaInfoDict['BackupType'] = backupType
    if tarUnitDescription.handlingPolicyName != None:
      metaInfoDict['HandlingPolicy'] = [tarUnitDescription.handlingPolicyName]
    lastUuid = tarUnitDescription.lastUuidValue
    currentUuidDigest = hashlib.sha512()
    if lastUuid != None:
      metaInfoDict['Predecessor'] = lastUuid
      currentUuidDigest.update(lastUuid)
# Add the compressed file digest to make UUID different for different
# content.
    currentUuidDigest.update(digestData)
# Also include the timestamp and source URL in the UUID calculation
# to make UUID different for backup of identical data at (nearly)
# same time.
    currentUuidDigest.update(bytes('%d %s' % (
        currentTime, tarUnitDescription.sourceUrl), sys.getdefaultencoding()))
    currentUuid = currentUuidDigest.digest()
    metaInfoDict['DataUuid'] = currentUuid
    metaInfoDict['StorageFileChecksumSha512'] = digestData
    metaInfoDict['Timestamp'] = currentTime

    metaInfo = BackupElementMetainfo(metaInfoDict)
    sinkHandle.close(metaInfo)
    if self.testModeFlag:
      raise Exception('No completion of tar backup in test mode')

    if tarUnitDescription.postBackupCommandList != None:
      process = subprocess.Popen(tarUnitDescription.postBackupCommandList)
      returnCode = process.wait()
      if returnCode != 0:
# Still raise an exception and thus prohibit completion of this
# tar backup. The PostBackupCommand itself cannot have an influence
# on the backup created before but the failure might indicate,
# that the corresponding PreBackupCommand was problematic. Thus
# let the user resolve the problem manually.
        raise Exception('Post backup command %s failed in %s, source %s' % (
            repr(tarUnitDescription.postBackupCommandList)[1:-1],
            self.unitName, tarUnitDescription.sourceUrl))

    if tarUnitDescription.incBackupTiming != None:
# See if there is an old index to compress and move, but only
# if it should be really kept. Currently fstatat function is not
# available, so use open/fstat instead.
      currentIndexFd = None
      currentIndexName = '%s.index' % indexFilenamePrefix
      try:
        currentIndexFd = guerillabackup.secureOpenAt(
            persistencyDirFd, currentIndexName,
            fileOpenFlags=os.O_RDONLY|os.O_NOFOLLOW|os.O_NOCTTY)
      except OSError as indexOpenError:
        if indexOpenError.errno != errno.ENOENT:
          raise

      targetFileName = None
      if currentIndexFd != None:
        if tarUnitDescription.keepOldIndicesCount == 0:
          os.close(currentIndexFd)
          os.unlink(currentIndexName, dir_fd=persistencyDirFd)
        else:
          statData = os.fstat(currentIndexFd)
          targetFileTime = int(statData.st_mtime)
          targetFileHandle = None
          while True:
            date = datetime.datetime.fromtimestamp(targetFileTime)
            dateStr = date.strftime('%Y%m%d%H%M%S')
            targetFileName = '%s.index.bz2.%s' % (indexFilenamePrefix, dateStr)
            try:
              targetFileHandle = guerillabackup.secureOpenAt(
                  persistencyDirFd, targetFileName,
                  fileOpenFlags=os.O_WRONLY|os.O_CREAT|os.O_EXCL|os.O_NOFOLLOW|os.O_NOCTTY,
                  fileCreateMode=0o600)
              break
            except OSError as indexBackupOpenError:
              if indexBackupOpenError.errno != errno.EEXIST:
                raise
            targetFileTime += 1
# Now both handles are valid, use external bzip2 binary to perform
# compression.
          process = subprocess.Popen(
              ['/bin/bzip2', '-c9'], stdin=currentIndexFd,
              stdout=targetFileHandle)
          returnCode = process.wait()
          if returnCode != 0:
            raise Exception('Failed to compress the old index: %s' % returnCode)
          os.close(currentIndexFd)
# FIXME: we should use utime with targetFileHandle as pathlike
# object, only available in Python3.6 and later.
          os.utime(
              '/proc/self/fd/%d' % targetFileHandle,
              (statData.st_mtime, statData.st_mtime))
          os.close(targetFileHandle)
          os.unlink(currentIndexName, dir_fd=persistencyDirFd)

# Now previous index was compressed or deleted, link the next
# index to the current position.
      os.link(
          nextIndexFileName, currentIndexName, src_dir_fd=persistencyDirFd,
          dst_dir_fd=persistencyDirFd, follow_symlinks=False)
      os.unlink(nextIndexFileName, dir_fd=persistencyDirFd)

      if tarUnitDescription.keepOldIndicesCount != -1:
# So we should apply limits to the number of index backups.
        fileList = []
        searchPrefix = '%s.index.bz2.' % indexFilenamePrefix
        searchLength = len(searchPrefix)+14
        for fileName in guerillabackup.listDirAt(persistencyDirFd):
          if ((len(fileName) != searchLength) or
              (not fileName.startswith(searchPrefix))):
            continue
          fileList.append(fileName)
        fileList.sort()

        if len(fileList) > tarUnitDescription.keepOldIndicesCount:
# Make sure that the new index file was sorted last. When not,
# the current state could indicate clock/time problems on the
# machine. Refuse to process the indices and issue a warning.
          indexBackupPos = fileList.index(targetFileName)
          if indexBackupPos+1 != len(fileList):
            raise Exception('Sorting of old backup indices inconsistent, refusing cleanup')
          for fileName in fileList[:-tarUnitDescription.keepOldIndicesCount]:
            os.unlink(fileName, dir_fd=persistencyDirFd)

# Update the UUID map as last step: if any of the steps above
# would fail, currentUuid generated in next run will be identical
# to this. Sorting out the duplicates will be easy.
    tarUnitDescription.lastUuidValue = currentUuid
# Update the timestamp.
    tarUnitDescription.lastAnyBackupTime = currentTime
    if backupType == 'full':
      tarUnitDescription.lastFullBackupTime = currentTime

# Write the new persistency data before returning.
    self.updateStateData(persistencyDirFd)
예제 #9
0
  def __init__(self, unitName, configContext):
    """Initialize this unit using the given configuration."""
    self.unitName = unitName
    self.configContext = configContext

    self.testModeFlag = configContext.get(guerillabackup.CONFIG_GENERAL_DEBUG_TEST_MODE_KEY, False)
    if not isinstance(self.testModeFlag, bool):
      raise Exception('Configuration parameter %s has to be ' \
          'boolean' % guerillabackup.CONFIG_GENERAL_DEBUG_TEST_MODE_KEY)

    backupConfigList = configContext.get(CONFIG_LIST_KEY, None)
    if (backupConfigList is None) or (not isinstance(backupConfigList, dict)):
      raise Exception('Configuration parameter %s missing or of wrong type' % CONFIG_LIST_KEY)
    self.backupUnitDescriptions = {}
    for sourceUrl, configDef in backupConfigList.items():
      self.backupUnitDescriptions[sourceUrl] = TarBackupUnitDescription(
          sourceUrl, configDef)

# Start loading the persistency information.
    persistencyDirFd = None
    persistencyFileHandle = None
    stateData = None
    try:
      persistencyDirFd = guerillabackup.openPersistencyFile(
          configContext, os.path.join('generators', self.unitName),
          os.O_DIRECTORY|os.O_RDONLY|os.O_CREAT|os.O_EXCL|os.O_NOFOLLOW|os.O_NOCTTY, 0o700)

      try:
        persistencyFileHandle = guerillabackup.secureOpenAt(
            persistencyDirFd, 'state.current',
            fileOpenFlags=os.O_RDONLY|os.O_NOFOLLOW|os.O_NOCTTY)
      except OSError as openError:
        if openError.errno != errno.ENOENT:
          raise

# See if the state.previous file exists, if yes, the unit is likely
# to be broken. Refuse to do anything while in this state.
      try:
        os.stat(
            'state.previous', dir_fd=persistencyDirFd, follow_symlinks=False)
        raise Exception(
            'Persistency data inconsistencies: found stale previous state file')
      except OSError as statError:
        if statError.errno != errno.ENOENT:
          raise
# So there is only the current state file, if any.
      if persistencyFileHandle != None:
        stateData = b''
        while True:
          data = os.read(persistencyFileHandle, 1<<20)
          if len(data) == 0:
            break
          stateData += data
        os.close(persistencyFileHandle)
        persistencyFileHandle = None
    finally:
      if persistencyFileHandle != None:
        os.close(persistencyFileHandle)
      if persistencyDirFd != None:
        os.close(persistencyDirFd)

# Start mangling of data after closing all file handles.
    if stateData is None:
      print('%s: first time activation, no persistency data found' % self.unitName, file=sys.stderr)
    else:
      stateInfo = json.loads(str(stateData, 'ascii'))
      if not isinstance(stateInfo, dict):
        raise Exception('Persistency data structure mismatch')
      for url, stateData in stateInfo.items():
        description = self.backupUnitDescriptions.get(url, None)
        if description is None:
# Ignore this state, user might have removed a single tar backup
# configuration without deleting the UUID and timing data.
          print('No tar backup configuration for %s resource state data %s' % (
              url, repr(stateData)), file=sys.stderr)
          continue
        description.lastFullBackupTime = stateData[0]
        description.lastAnyBackupTime = stateData[1]
# The UUID is kept internally as binary data string. Only for
# persistency, data will be base64 encoded.
        description.lastUuidValue = base64.b64decode(stateData[2])
예제 #10
0
    def invokeUnit(self, sink):
        """Invoke this unit to create backup elements and pass them
    on to the sink. Even when indicated via getNextInvocationTime,
    the unit may decide, that it is not yet ready and not write
    any element to the sink.
    @return None if currently there is nothing to write to the
    source, a number of seconds to retry invocation if the unit
    assumes, that there is data to be processed but processing
    cannot start yet, e.g. due to locks held by other parties
    or resource, e.g. network storages, currently not available."""
        nextInvocationDelta = self.getNextInvocationTime()
        invocationAttemptedFlag = False

        try:
            if nextInvocationDelta == 0:
                # We are now ready for processing. Get the list of source directories
                # and search patterns to locate the target files.
                unitInputListConfig = self.configContext.get(
                    CONFIG_INPUT_LIST_KEY, None)
                invocationAttemptedFlag = True
                nextInvocationDelta = None

                if unitInputListConfig is None:
                    print('Suspected configuration error: LogfileBackupUnit ' \
                        'enabled but %s configuration list empty' % CONFIG_INPUT_LIST_KEY,
                          file=sys.stderr)
                else:
                    for configItem in unitInputListConfig:
                        unitInput = None
                        try:
                            unitInput = LogfileBackupUnitInputDescription(
                                configItem)
                        except Exception as configReadException:
                            print('LogfileBackupUnit: failed to use configuration ' \
                                '%s: %s' % (
                                    repr(configItem), configReadException.args[0]),
                                  file=sys.stderr)
                            continue
# Configuration parsing worked, start processing the inputs.
                        self.processInput(unitInput, sink)
        finally:
            if invocationAttemptedFlag:
                try:
                    # Update the timestamp.
                    self.lastInvocationTime = int(time.time())
                    # Write back the new state information immediately after invocation
                    # to avoid data loss when program crashes immediately afterwards.
                    # Keep one old version of state file.
                    try:
                        os.unlink('state.old', dir_fd=self.persistencyDirFd)
                    except OSError as relinkError:
                        if relinkError.errno != errno.ENOENT:
                            raise
                    try:
                        os.link('state.current',
                                'state.old',
                                src_dir_fd=self.persistencyDirFd,
                                dst_dir_fd=self.persistencyDirFd,
                                follow_symlinks=False)
                    except OSError as relinkError:
                        if relinkError.errno != errno.ENOENT:
                            raise
                    try:
                        os.unlink('state.current',
                                  dir_fd=self.persistencyDirFd)
                    except OSError as relinkError:
                        if relinkError.errno != errno.ENOENT:
                            raise
                    handle = guerillabackup.secureOpenAt(
                        self.persistencyDirFd,
                        'state.current',
                        fileOpenFlags=os.O_WRONLY | os.O_CREAT | os.O_EXCL
                        | os.O_NOFOLLOW | os.O_NOCTTY,
                        fileCreateMode=0o600)
                    writeResourceUuidMap = {}
                    for url, uuidData in self.resourceUuidMap.items():
                        writeResourceUuidMap[url] = str(
                            base64.b64encode(uuidData), 'ascii')
                    os.write(
                        handle,
                        json.dumps(
                            [self.lastInvocationTime,
                             writeResourceUuidMap]).encode('ascii'))
                    os.close(handle)
                except Exception as stateSaveException:
                    # Writing of state information failed. Print out the state information
                    # for manual reconstruction as last resort.
                    print('Writing of state information failed: %s\nCurrent ' \
                        'state: %s' % (
                            str(stateSaveException),
                            repr([self.lastInvocationTime, self.resourceUuidMap])),
                          file=sys.stderr)
                    traceback.print_tb(sys.exc_info()[2])
                    raise
예제 #11
0
    def processInput(self, unitInput, sink):
        """Process a single input description by searching for files
    that could be written to the sink."""
        inputDirectoryFd = None
        getFileOpenerInformationErrorMode = guerillabackup.OPENER_INFO_FAIL_ON_ERROR
        if os.geteuid() != 0:
            getFileOpenerInformationErrorMode = guerillabackup.OPENER_INFO_IGNORE_ACCESS_ERRORS
        try:
            inputDirectoryFd = guerillabackup.secureOpenAt(
                None,
                unitInput.inputDirectoryName,
                fileOpenFlags=os.O_DIRECTORY | os.O_RDONLY | os.O_NOFOLLOW
                | os.O_NOCTTY)

            sourceDict = {}
            for fileName in guerillabackup.listDirAt(inputDirectoryFd):
                matcher = unitInput.inputFileRegex.match(fileName)
                if matcher is None:
                    continue
                sourceUrl = unitInput.getTransformedSourceName(matcher)
                sourceInfo = sourceDict.get(sourceUrl, None)
                if sourceInfo is None:
                    sourceInfo = LogfileSourceInfo(sourceUrl)
                    sourceDict[sourceUrl] = sourceInfo
                sourceInfo.addFile(fileName, matcher)

# Now we know all files to be included for each URL. Sort them
# to fulfill Req:OrderedProcessing and start with the oldest.
            for sourceUrl, sourceInfo in sourceDict.items():
                if not sourceInfo.serialTypesConsistentFlag:
                    print('Inconsistent serial types in %s, ignoring ' \
                        'source.' % sourceInfo.sourceUrl, file=sys.stderr)
                    continue

# Get the downstream transformation pipeline elements.
                downstreamPipelineElements = \
                    guerillabackup.getDefaultDownstreamPipeline(
                        self.configContext, unitInput.encryptionKeyName)
                fileList = sourceInfo.getSortedFileList()
                fileInfoList = guerillabackup.getFileOpenerInformation([
                    '%s/%s' % (unitInput.inputDirectoryName, x[0])
                    for x in fileList
                ], getFileOpenerInformationErrorMode)
                for fileListIndex in range(0, len(fileList)):
                    fileName, matcher, serialData = fileList[fileListIndex]
                    # Make sure, that the file is not written any more.
                    logFilePathName = os.path.join(
                        unitInput.inputDirectoryName, fileName)
                    isOpenForWritingFlag = False
                    if fileInfoList[fileListIndex] != None:
                        for pid, fdInfoList in fileInfoList[fileListIndex]:
                            for fdNum, fdOpenFlags in fdInfoList:
                                if fdOpenFlags == 0o100001:
                                    print('File %s is still written by pid %d, ' \
                                        'fd %d' % (logFilePathName, pid, fdNum), file=sys.stderr)
                                    isOpenForWritingFlag = True
                                elif fdOpenFlags != 0o100000:
                                    print('File %s unknown open flags 0x%x by pid %d, ' \
                                        'fd %d' % (
                                            logFilePathName, fdOpenFlags, pid, fdNum), file=sys.stderr)
                                    isOpenForWritingFlag = True
# Files have to be processed in correct order, so we have to stop
# here.
                    if isOpenForWritingFlag:
                        break
                    completePipleline = downstreamPipelineElements
                    compressionType = matcher.groupdict().get('compress', None)
                    if compressionType != None:
                        # Source file is compressed, prepend a suffix/content-specific
                        # decompression element.
                        compressionElement = None
                        if compressionType == 'gz':
                            compressionElement = guerillabackup.OSProcessPipelineElement(
                                '/bin/gzip', ['/bin/gzip', '-cd'])
                        else:
                            raise Exception(
                                'Unkown compression type %s for file %s/%s' %
                                (compressionType, unitInput.inputDirectoryName,
                                 fileName))
                        completePipleline = [compressionElement
                                             ] + completePipleline[:]

                    logFileFd = guerillabackup.secureOpenAt(
                        inputDirectoryFd,
                        fileName,
                        fileOpenFlags=os.O_RDONLY | os.O_NOFOLLOW
                        | os.O_NOCTTY)
                    logFileStatData = os.fstat(logFileFd)
                    # By wrapping the logFileFd into this object, the first pipeline
                    # element will close it. So we do not need to care here.
                    logFileOutput = TransformationProcessOutputStream(
                        logFileFd)

                    sinkHandle = sink.getSinkHandle(sourceInfo.sourceUrl)
                    sinkStream = sinkHandle.getSinkStream()

                    # Get the list of started pipeline instances.
                    pipelineInstances = guerillabackup.instantiateTransformationPipeline(
                        completePipleline,
                        logFileOutput,
                        sinkStream,
                        doStartFlag=True)
                    guerillabackup.runTransformationPipeline(pipelineInstances)
                    digestData = pipelineInstances[-1].getDigestData()

                    metaInfoDict = {}
                    metaInfoDict['BackupType'] = 'full'
                    if unitInput.handlingPolicyName != None:
                        metaInfoDict['HandlingPolicy'] = [
                            unitInput.handlingPolicyName
                        ]
                    lastUuid = self.resourceUuidMap.get(
                        sourceInfo.sourceUrl, None)
                    currentUuidDigest = hashlib.sha512()
                    if lastUuid != None:
                        metaInfoDict['Predecessor'] = lastUuid
                        currentUuidDigest.update(lastUuid)
# Add the compressed file digest. The consequence is, that it
# will not be completely obvious when the same file was processed
# with twice with encryption enabled and processing failed in
# late phase. Therefore identical file content cannot be detected.
                    currentUuidDigest.update(digestData)
                    # Also include the timestamp and original filename of the source
                    # file in the UUID calculation: Otherwise retransmissions of files
                    # with identical content cannot be distinguished.
                    currentUuidDigest.update(
                        bytes('%d %s' % (logFileStatData.st_mtime, fileName),
                              sys.getdefaultencoding()))
                    currentUuid = currentUuidDigest.digest()
                    metaInfoDict['DataUuid'] = currentUuid
                    metaInfoDict['StorageFileChecksumSha512'] = digestData
                    metaInfoDict['Timestamp'] = int(logFileStatData.st_mtime)

                    metaInfo = BackupElementMetainfo(metaInfoDict)
                    sinkHandle.close(metaInfo)
                    if self.testModeFlag:
                        raise Exception(
                            'No completion of logfile backup in test mode')
# Delete the logfile.
                    os.unlink(fileName, dir_fd=inputDirectoryFd)

                    # Update the UUID map as last step: if any of the steps above
                    # would fail, currentUuid generated in next run will be identical
                    # to this. Sorting out the duplicates will be easy.
                    self.resourceUuidMap[sourceInfo.sourceUrl] = currentUuid
        finally:
            if inputDirectoryFd != None:
                os.close(inputDirectoryFd)
예제 #12
0
    def __init__(self, unitName, configContext):
        """Initialize this unit using the given configuration."""
        self.unitName = unitName
        self.configContext = configContext
        # This is the maximum interval in seconds between two invocations.
        # When last invocation was more than that number of seconds in
        # the past, the unit will attempt invocation at first possible
        # moment.
        self.maxInvocationInterval = 3600
        # When this value is not zero, the unit will attempt to trigger
        # invocation always at the same time using this value as modulus.
        self.moduloInvocationUnit = 3600
        # This is the invocation offset when modulus timing is enabled.
        self.moduloInvocationTime = 0
        # As immediate invocation cannot be guaranteed, this value defines
        # the size of the window, within that the unit should still be
        # invoked, even when the targeted time slot has already passed
        # by.
        self.moduloInvocationTimeWindow = 10

        self.testModeFlag = configContext.get(
            guerillabackup.CONFIG_GENERAL_DEBUG_TEST_MODE_KEY, False)
        if not isinstance(self.testModeFlag, bool):
            raise Exception('Configuration parameter %s has to be ' \
                'boolean' % guerillabackup.CONFIG_GENERAL_DEBUG_TEST_MODE_KEY)

# Timestamp of last invocation end.
        self.lastInvocationTime = -1
        # Map from resource name to UUID of most recent file processed.
        # The UUID is kept internally as binary data string. Only for
        # persistency, data will be base64 encoded.
        self.resourceUuidMap = {}
        self.persistencyDirFd = guerillabackup.openPersistencyFile(
            configContext, os.path.join('generators', self.unitName),
            os.O_DIRECTORY | os.O_RDONLY | os.O_CREAT | os.O_EXCL
            | os.O_NOFOLLOW | os.O_NOCTTY, 0o700)

        handle = None
        try:
            handle = guerillabackup.secureOpenAt(self.persistencyDirFd,
                                                 'state.current',
                                                 fileOpenFlags=os.O_RDONLY
                                                 | os.O_NOFOLLOW | os.O_NOCTTY)
        except OSError as openError:
            if openError.errno != errno.ENOENT:
                raise
# See if the state.previous file exists, if yes, the unit is likely
# to be broken. Refuse to do anything while in this state.
        try:
            os.stat('state.previous',
                    dir_fd=self.persistencyDirFd,
                    follow_symlinks=False)
            raise Exception(
                'Persistency data inconsistencies: found stale previous state file'
            )
        except OSError as statError:
            if statError.errno != errno.ENOENT:
                raise
# So there is only the current state file, if any.
        stateInfo = None
        if handle != None:
            stateData = b''
            while True:
                data = os.read(handle, 1 << 20)
                if len(data) == 0:
                    break
                stateData += data
            os.close(handle)
            stateInfo = json.loads(str(stateData, 'ascii'))
            if ((not isinstance(stateInfo, list)) or (len(stateInfo) != 2)
                    or (not isinstance(stateInfo[0], int))
                    or (not isinstance(stateInfo[1], dict))):
                raise Exception('Persistency data structure mismatch')
            self.lastInvocationTime = stateInfo[0]
            self.resourceUuidMap = stateInfo[1]
            for url, uuidData in self.resourceUuidMap.items():
                self.resourceUuidMap[url] = base64.b64decode(uuidData)