def _getOutputFile(targetDir, database, compressMode): """ Opens the output file used for saving the MySQL dump. The filename is either ``"mysqldump.txt"`` or ``"mysqldump-<database>.txt"``. The ``".bz2"`` extension is added if ``compress`` is ``True``. Args: targetDir: Target directory to write file in database: Name of the database (if any) compressMode: Compress mode to be used for backed-up files Returns: Tuple of (Output file object, filename), file opened in binary mode for use with executeCommand() """ if database is None: filename = pathJoin(targetDir, "mysqldump.txt") else: filename = pathJoin(targetDir, "mysqldump-%s.txt" % database) if compressMode == "gzip": filename = "%s.gz" % filename outputFile = GzipFile(filename, "wb") elif compressMode == "bzip2": filename = "%s.bz2" % filename outputFile = BZ2File(filename, "wb") else: outputFile = open(filename, "wb") logger.debug("MySQL dump file will be [%s].", filename) return (outputFile, filename)
def findDailyDirs(stagingDir, indicatorFile): """ Returns a list of all daily staging directories that do not contain the indicated indicator file. Returns: List of absolute paths to daily staging directories """ results = FilesystemList() yearDirs = FilesystemList() yearDirs.excludeFiles = True yearDirs.excludeLinks = True yearDirs.addDirContents(path=stagingDir, recursive=False, addSelf=False) for yearDir in yearDirs: monthDirs = FilesystemList() monthDirs.excludeFiles = True monthDirs.excludeLinks = True monthDirs.addDirContents(path=yearDir, recursive=False, addSelf=False) for monthDir in monthDirs: dailyDirs = FilesystemList() dailyDirs.excludeFiles = True dailyDirs.excludeLinks = True dailyDirs.addDirContents(path=monthDir, recursive=False, addSelf=False) for dailyDir in dailyDirs: if os.path.exists(pathJoin(dailyDir, indicatorFile)): logger.debug("Skipping directory [%s]; contains %s.", dailyDir, indicatorFile) else: logger.debug("Adding [%s] to list of daily directories.", dailyDir) results.append(dailyDir) # just put it in the list, no fancy operations return results
def _createStagingDirs(config, dailyDir, peers): """ Creates staging directories as required. The main staging directory is the passed in daily directory, something like ``staging/2002/05/23``. Then, individual peers get their own directories, i.e. ``staging/2002/05/23/host``. Args: config: Config object dailyDir: Daily staging directory peers: List of all configured peers Returns: Dictionary mapping peer name to staging directory """ mapping = {} if os.path.isdir(dailyDir): logger.warning("Staging directory [%s] already existed.", dailyDir) else: try: logger.debug("Creating staging directory [%s].", dailyDir) os.makedirs(dailyDir) for path in [ dailyDir, pathJoin(dailyDir, ".."), pathJoin(dailyDir, "..", "..") ]: changeOwnership(path, config.options.backupUser, config.options.backupGroup) except Exception as e: raise Exception("Unable to create staging directory: %s" % e) for peer in peers: peerDir = pathJoin(dailyDir, peer.name) mapping[peer.name] = peerDir if os.path.isdir(peerDir): logger.warning("Peer staging directory [%s] already existed.", peerDir) else: try: logger.debug("Creating peer staging directory [%s].", peerDir) os.makedirs(peerDir) changeOwnership(peerDir, config.options.backupUser, config.options.backupGroup) except Exception as e: raise Exception("Unable to create staging directory: %s" % e) return mapping
def _findRebuildDirs(config): """ Finds the set of directories to be included in a disc rebuild. A the rebuild action is supposed to recreate the "last week's" disc. This won't always be possible if some of the staging directories are missing. However, the general procedure is to look back into the past no further than the previous "starting day of week", and then work forward from there trying to find all of the staging directories between then and now that still exist and have a stage indicator. Args: config: Config object Returns: Correct staging dir, as a dict mapping directory to date suffix Raises: IOError: If we do not find at least one staging directory """ stagingDirs = {} start = deriveDayOfWeek(config.options.startingDay) today = datetime.date.today() if today.weekday() >= start: days = today.weekday() - start + 1 else: days = 7 - (start - today.weekday()) + 1 for i in range(0, days): currentDay = today - datetime.timedelta(days=i) dateSuffix = currentDay.strftime(DIR_TIME_FORMAT) stageDir = pathJoin(config.store.sourceDir, dateSuffix) indicator = pathJoin(stageDir, STAGE_INDICATOR) if os.path.isdir(stageDir) and os.path.exists(indicator): logger.info("Rebuild process will include stage directory [%s]", stageDir) stagingDirs[stageDir] = dateSuffix if len(stagingDirs) == 0: raise IOError( "Unable to find any staging directories for rebuild process.") return stagingDirs
def commandAvailable(command): """ Indicates whether a command is available on $PATH somewhere. This should work on both Windows and UNIX platforms. Args: command: Commang to search for Returns: Boolean true/false depending on whether command is available """ if "PATH" in os.environ: for path in os.environ["PATH"].split(os.sep): if os.path.exists(pathJoin(path, command)): return True return False
def _getDigestPath(config, absolutePath): """ Gets the digest path associated with a collect directory or file. Args: config: Config object absolutePath: Absolute path to generate digest for Returns: Absolute path to the digest associated with the collect directory or file """ normalized = buildNormalizedPath(absolutePath) filename = "%s.%s" % (normalized, DIGEST_EXTENSION) digestPath = pathJoin(config.options.workingDir, filename) logger.debug("Digest path is [%s]", digestPath) return digestPath
def removedir(tree): """ Recursively removes an entire directory. This is basically taken from an example on python.com. Args: tree: Directory tree to remove Raises: ValueError: If a path cannot be encoded properly """ tree = encodePath(tree) for root, dirs, files in os.walk(tree, topdown=False): for name in files: path = pathJoin(root, name) if os.path.islink(path): os.remove(path) elif os.path.isfile(path): os.remove(path) for name in dirs: path = pathJoin(root, name) if os.path.islink(path): os.remove(path) elif os.path.isdir(path): os.rmdir(path) os.rmdir(tree)
def buildPath(components): """ Builds a complete path from a list of components. For instance, constructs ``"/a/b/c"`` from ``["/a", "b", "c"]``. Args: components: List of components Returns: String path constructed from components Raises: ValueError: If a path cannot be encoded properly """ path = components[0] for component in components[1:]: path = pathJoin(path, component) return encodePath(path)
def _getDailyDir(config): """ Gets the daily staging directory. This is just a directory in the form ``staging/YYYY/MM/DD``, i.e. ``staging/2000/10/07``, except it will be an absolute path based on ``config.stage.targetDir``. Args: config: Config object Returns: Path of daily staging directory """ dailyDir = pathJoin(config.stage.targetDir, time.strftime(DIR_TIME_FORMAT)) logger.debug("Daily staging directory is [%s].", dailyDir) return dailyDir
def consistencyCheck(config, stagingDirs): """ Runs a consistency check against media in the backup device. It seems that sometimes, it's possible to create a corrupted multisession disc (i.e. one that cannot be read) although no errors were encountered while writing the disc. This consistency check makes sure that the data read from disc matches the data that was used to create the disc. The function mounts the device at a temporary mount point in the working directory, and then compares the indicated staging directories in the staging directory and on the media. The comparison is done via functionality in ``filesystem.py``. If no exceptions are thrown, there were no problems with the consistency check. A positive confirmation of "no problems" is also written to the log with ``info`` priority. @warning: The implementation of this function is very UNIX-specific. Args: config: Config object stagingDirs: Dictionary mapping directory path to date suffix Raises: ValueError: If the two directories are not equivalent IOError: If there is a problem working with the media """ logger.debug("Running consistency check.") mountPoint = tempfile.mkdtemp(dir=config.options.workingDir) try: mount(config.store.devicePath, mountPoint, "iso9660") for stagingDir in list(stagingDirs.keys()): discDir = pathJoin(mountPoint, stagingDirs[stagingDir]) logger.debug("Checking [%s] vs. [%s].", stagingDir, discDir) compareContents(stagingDir, discDir, verbose=True) logger.info( "Consistency check completed for [%s]. No problems found.", stagingDir) finally: unmount(mountPoint, True, 5, 1) # try 5 times, and remove mount point when done
def writeIndicatorFile(targetDir, indicatorFile, backupUser, backupGroup): """ Writes an indicator file into a target directory. Args: targetDir: Target directory in which to write indicator indicatorFile: Name of the indicator file backupUser: User that indicator file should be owned by backupGroup: Group that indicator file should be owned by Raises: IOException: If there is a problem writing the indicator file """ filename = pathJoin(targetDir, indicatorFile) logger.debug("Writing indicator file [%s].", filename) try: with open(filename, "w") as f: # pylint: disable=unspecified-encoding f.write("") changeOwnership(filename, backupUser, backupGroup) except Exception as e: logger.error("Error writing [%s]: %s", filename, e) raise e
def findResources(resources, dataDirs): """ Returns a dictionary of locations for various resources. Args: resources: List of required resources dataDirs: List of data directories to search within for resources Returns: Dictionary mapping resource name to resource path Raises: Exception: If some resource cannot be found """ mapping = {} for resource in resources: for resourceDir in dataDirs: path = pathJoin(resourceDir, resource) if os.path.exists(path): mapping[resource] = path break else: raise Exception("Unable to find resource [%s]." % resource) return mapping
def _getTarfilePath(config, absolutePath, archiveMode): """ Gets the tarfile path (including correct extension) associated with a collect directory. Args: config: Config object absolutePath: Absolute path to generate tarfile for archiveMode: Archive mode to use for this tarfile Returns: Absolute path to the tarfile associated with the collect directory """ if archiveMode == "tar": extension = "tar" elif archiveMode == "targz": extension = "tar.gz" elif archiveMode == "tarbz2": extension = "tar.bz2" normalized = buildNormalizedPath(absolutePath) filename = "%s.%s" % (normalized, extension) tarfilePath = pathJoin(config.collect.targetDir, filename) logger.debug("Tarfile path is [%s]", tarfilePath) return tarfilePath
def _getExclusions(config, collectDir): """ Gets exclusions (file and patterns) associated with a collect directory. The returned files value is a list of absolute paths to be excluded from the backup for a given directory. It is derived from the collect configuration absolute exclude paths and the collect directory's absolute and relative exclude paths. The returned patterns value is a list of patterns to be excluded from the backup for a given directory. It is derived from the list of patterns from the collect configuration and from the collect directory itself. Args: config: Config object collectDir: Collect directory object Returns: Tuple (files, patterns) indicating what to exclude """ paths = [] if config.collect.absoluteExcludePaths is not None: paths.extend(config.collect.absoluteExcludePaths) if collectDir.absoluteExcludePaths is not None: paths.extend(collectDir.absoluteExcludePaths) if collectDir.relativeExcludePaths is not None: for relativePath in collectDir.relativeExcludePaths: paths.append(pathJoin(collectDir.absolutePath, relativePath)) patterns = [] if config.collect.excludePatterns is not None: patterns.extend(config.collect.excludePatterns) if collectDir.excludePatterns is not None: patterns.extend(collectDir.excludePatterns) logger.debug("Exclude paths: %s", paths) logger.debug("Exclude patterns: %s", patterns) return (paths, patterns)
def _getOutputFile(targetDir, name, compress=True): """ Opens the output file used for saving a dump to the filesystem. The filename will be ``name.txt`` (or ``name.txt.bz2`` if ``compress`` is ``True``), written in the target directory. Args: targetDir: Target directory to write file in name: Name of the file to create compress: Indicates whether to write compressed output Returns: Tuple of (Output file object, filename), file opened in binary mode for use with executeCommand() """ filename = pathJoin(targetDir, "%s.txt" % name) if compress: filename = "%s.bz2" % filename logger.debug("Dump file will be [%s].", filename) if compress: outputFile = BZ2File(filename, "wb") else: outputFile = open(filename, "wb") return (outputFile, filename)
def _findCorrectDailyDir(options, config): """ Finds the correct daily staging directory to be written to disk. In Cedar Backup v1.0, we assumed that the correct staging directory matched the current date. However, that has problems. In particular, it breaks down if collect is on one side of midnite and stage is on the other, or if certain processes span midnite. For v2.0, I'm trying to be smarter. I'll first check the current day. If that directory is found, it's good enough. If it's not found, I'll look for a valid directory from the day before or day after I{which has not yet been staged, according to the stage indicator file}. The first one I find, I'll use. If I use a directory other than for the current day *and* ``config.store.warnMidnite`` is set, a warning will be put in the log. There is one exception to this rule. If the ``options.full`` flag is set, then the special "span midnite" logic will be disabled and any existing store indicator will be ignored. I did this because I think that most users who run ``cback3 --full store`` twice in a row expect the command to generate two identical discs. With the other rule in place, running that command twice in a row could result in an error ("no unstored directory exists") or could even cause a completely unexpected directory to be written to disc (if some previous day's contents had not yet been written). *Note:* This code is probably longer and more verbose than it needs to be, but at least it's straightforward. Args: options: Options object config: Config object Returns: Correct staging dir, as a dict mapping directory to date suffix Raises: IOError: If the staging directory cannot be found """ oneDay = datetime.timedelta(days=1) today = datetime.date.today() yesterday = today - oneDay tomorrow = today + oneDay todayDate = today.strftime(DIR_TIME_FORMAT) yesterdayDate = yesterday.strftime(DIR_TIME_FORMAT) tomorrowDate = tomorrow.strftime(DIR_TIME_FORMAT) todayPath = pathJoin(config.stage.targetDir, todayDate) yesterdayPath = pathJoin(config.stage.targetDir, yesterdayDate) tomorrowPath = pathJoin(config.stage.targetDir, tomorrowDate) todayStageInd = pathJoin(todayPath, STAGE_INDICATOR) yesterdayStageInd = pathJoin(yesterdayPath, STAGE_INDICATOR) tomorrowStageInd = pathJoin(tomorrowPath, STAGE_INDICATOR) todayStoreInd = pathJoin(todayPath, STORE_INDICATOR) yesterdayStoreInd = pathJoin(yesterdayPath, STORE_INDICATOR) tomorrowStoreInd = pathJoin(tomorrowPath, STORE_INDICATOR) if options.full: if os.path.isdir(todayPath) and os.path.exists(todayStageInd): logger.info( "Store process will use current day's stage directory [%s]", todayPath) return {todayPath: todayDate} raise IOError( "Unable to find staging directory to store (only tried today due to full option)." ) else: if os.path.isdir(todayPath) and os.path.exists( todayStageInd) and not os.path.exists(todayStoreInd): logger.info( "Store process will use current day's stage directory [%s]", todayPath) return {todayPath: todayDate} elif os.path.isdir(yesterdayPath) and os.path.exists( yesterdayStageInd) and not os.path.exists(yesterdayStoreInd): logger.info( "Store process will use previous day's stage directory [%s]", yesterdayPath) if config.store.warnMidnite: logger.warning( "Warning: store process crossed midnite boundary to find data." ) return {yesterdayPath: yesterdayDate} elif os.path.isdir(tomorrowPath) and os.path.exists( tomorrowStageInd) and not os.path.exists(tomorrowStoreInd): logger.info( "Store process will use next day's stage directory [%s]", tomorrowPath) if config.store.warnMidnite: logger.warning( "Warning: store process crossed midnite boundary to find data." ) return {tomorrowPath: tomorrowDate} raise IOError( "Unable to find unused staging directory to store (tried today, yesterday, tomorrow)." )
def addEntry(self, path, graftPoint=None, override=False, contentsOnly=False): """ Adds an individual file or directory into the ISO image. The path must exist and must be a file or a directory. By default, the entry will be placed into the image at the root directory, but this behavior can be overridden using the ``graftPoint`` parameter or instance variable. You can use the ``contentsOnly`` behavior to revert to the "original" ``mkisofs`` behavior for adding directories, which is to add only the items within the directory, and not the directory itself. *Note:* Things get *odd* if you try to add a directory to an image that will be written to a multisession disc, and the same directory already exists in an earlier session on that disc. Not all of the data gets written. You really wouldn't want to do this anyway, I guess. *Note:* An exception will be thrown if the path has already been added to the image, unless the ``override`` parameter is set to ``True``. *Note:* The method ``graftPoints`` parameter overrides the object-wide instance variable. If neither the method parameter or object-wide value is set, the path will be written at the image root. The graft point behavior is determined by the value which is in effect I{at the time this method is called}, so you *must* set the object-wide value before calling this method for the first time, or your image may not be consistent. *Note:* You *cannot* use the local ``graftPoint`` parameter to "turn off" an object-wide instance variable by setting it to ``None``. Python's default argument functionality buys us a lot, but it can't make this method psychic. :) Args: path (String representing a path on disk): File or directory to be added to the image graftPoint (String representing a graft point path, as described above): Graft point to be used when adding this entry override (Boolean true/false): Override an existing entry with the same path contentsOnly (Boolean true/false): Add directory contents only (standard ``mkisofs`` behavior) Raises: ValueError: If path is not a file or directory, or does not exist ValueError: If the path has already been added, and override is not set ValueError: If a path cannot be encoded properly """ path = encodePath(path) if not override: if path in list(self.entries.keys()): raise ValueError("Path has already been added to the image.") if os.path.islink(path): raise ValueError("Path must not be a link.") if os.path.isdir(path): if graftPoint is not None: if contentsOnly: self.entries[path] = graftPoint else: self.entries[path] = pathJoin(graftPoint, os.path.basename(path)) elif self.graftPoint is not None: if contentsOnly: self.entries[path] = self.graftPoint else: self.entries[path] = pathJoin(self.graftPoint, os.path.basename(path)) else: if contentsOnly: self.entries[path] = None else: self.entries[path] = os.path.basename(path) elif os.path.isfile(path): if graftPoint is not None: self.entries[path] = graftPoint elif self.graftPoint is not None: self.entries[path] = self.graftPoint else: self.entries[path] = None else: raise ValueError("Path must be a file or a directory.")
def testSplitDailyDir_002(self): """ Test with 1.0 MB limit. """ self.extractTar("tree21") dailyDir = self.buildPath(["tree21", "2007", "01", "01"]) self.assertTrue(os.path.exists(dailyDir) and os.path.isdir(dailyDir)) self.assertTrue(os.path.exists(pathJoin(dailyDir, "system1", "file001.a.b"))) self.assertTrue(os.path.exists(pathJoin(dailyDir, "system1", "file002"))) self.assertTrue(os.path.exists(pathJoin(dailyDir, "system1", "file003"))) self.assertTrue(os.path.exists(pathJoin(dailyDir, "system2", "file001"))) self.assertTrue(os.path.exists(pathJoin(dailyDir, "system2", "file002"))) self.assertTrue(os.path.exists(pathJoin(dailyDir, "system2", "file003"))) self.assertTrue(os.path.exists(pathJoin(dailyDir, "system3", "file001"))) self.assertTrue(os.path.exists(pathJoin(dailyDir, "system3", "file002"))) self.assertTrue(os.path.exists(pathJoin(dailyDir, "system3", "file003"))) sizeLimit = ByteQuantity("1.0", UNIT_MBYTES) splitSize = ByteQuantity("100000", UNIT_BYTES) _splitDailyDir(dailyDir, sizeLimit, splitSize, None, None) self.assertTrue(os.path.exists(pathJoin(dailyDir, "system1", "file001.a.b"))) self.assertTrue(os.path.exists(pathJoin(dailyDir, "system1", "file002"))) self.assertTrue(os.path.exists(pathJoin(dailyDir, "system1", "file003"))) self.assertTrue(os.path.exists(pathJoin(dailyDir, "system2", "file001"))) self.assertTrue(os.path.exists(pathJoin(dailyDir, "system2", "file002"))) self.assertTrue(os.path.exists(pathJoin(dailyDir, "system2", "file003"))) self.assertTrue(os.path.exists(pathJoin(dailyDir, "system3", "file001"))) self.assertTrue(os.path.exists(pathJoin(dailyDir, "system3", "file002"))) self.assertTrue(os.path.exists(pathJoin(dailyDir, "system3", "file003")))
def testSplitDailyDir_004(self): """ Test with 99,999 byte limit, chopped down to 5,000 bytes """ self.extractTar("tree21") dailyDir = self.buildPath(["tree21", "2007", "01", "01"]) self.assertTrue(os.path.exists(dailyDir) and os.path.isdir(dailyDir)) self.assertTrue(os.path.exists(pathJoin(dailyDir, "system1", "file001.a.b"))) self.assertTrue(os.path.exists(pathJoin(dailyDir, "system1", "file002"))) self.assertTrue(os.path.exists(pathJoin(dailyDir, "system1", "file003"))) self.assertTrue(os.path.exists(pathJoin(dailyDir, "system2", "file001"))) self.assertTrue(os.path.exists(pathJoin(dailyDir, "system2", "file002"))) self.assertTrue(os.path.exists(pathJoin(dailyDir, "system2", "file003"))) self.assertTrue(os.path.exists(pathJoin(dailyDir, "system3", "file001"))) self.assertTrue(os.path.exists(pathJoin(dailyDir, "system3", "file002"))) self.assertTrue(os.path.exists(pathJoin(dailyDir, "system3", "file003"))) sizeLimit = ByteQuantity("99999", UNIT_BYTES) splitSize = ByteQuantity("5000", UNIT_BYTES) _splitDailyDir(dailyDir, sizeLimit, splitSize, None, None) self.assertTrue(os.path.exists(pathJoin(dailyDir, "system1", "file001.a.b"))) self.assertTrue(os.path.exists(pathJoin(dailyDir, "system1", "file002"))) self.assertFalse(os.path.exists(pathJoin(dailyDir, "system1", "file003"))) self.assertTrue(os.path.exists(pathJoin(dailyDir, "system2", "file001"))) self.assertTrue(os.path.exists(pathJoin(dailyDir, "system2", "file002"))) self.assertFalse(os.path.exists(pathJoin(dailyDir, "system2", "file003"))) self.assertTrue(os.path.exists(pathJoin(dailyDir, "system3", "file001"))) self.assertFalse(os.path.exists(pathJoin(dailyDir, "system3", "file002"))) self.assertFalse(os.path.exists(pathJoin(dailyDir, "system3", "file003"))) self.checkSplit(pathJoin(dailyDir, "system1", "file003"), 320000, 5000) self.checkSplit(pathJoin(dailyDir, "system2", "file003"), 100000, 5000) self.checkSplit(pathJoin(dailyDir, "system3", "file002"), 100000, 5000) self.checkSplit(pathJoin(dailyDir, "system3", "file003"), 100001, 5000)