def delete_backup(self, backup): """ Delete a backup :param backup: the backup to delete """ available_backups = self.get_available_backups() minimum_redundancy = self.server.config.minimum_redundancy # Honour minimum required redundancy if backup.status == BackupInfo.DONE and \ minimum_redundancy >= len(available_backups): output.warning("Skipping delete of backup %s for server %s " "due to minimum redundancy requirements " "(minimum redundancy = %s, " "current redundancy = %s)", backup.backup_id, self.config.name, len(available_backups), minimum_redundancy) return output.info("Deleting backup %s for server %s", backup.backup_id, self.config.name) previous_backup = self.get_previous_backup(backup.backup_id) next_backup = self.get_next_backup(backup.backup_id) # Delete all the data contained in the backup try: self.delete_backup_data(backup) except OSError as e: output.error("Failure deleting backup %s for server %s.\n%s", backup.backup_id, self.config.name, e) return # Check if we are deleting the first available backup if not previous_backup: # In the case of exclusive backup (default), removes any WAL # files associated to the backup being deleted. # In the case of concurrent backup, removes only WAL files # prior to the start of the backup being deleted, as they # might be useful to any concurrent backup started immediately # after. remove_until = None # means to remove all WAL files if next_backup: remove_until = next_backup elif BackupOptions.CONCURRENT_BACKUP in self.config.backup_options: remove_until = backup output.info("Delete associated WAL segments:") for name in self.remove_wal_before_backup(remove_until): output.info("\t%s", name) # As last action, remove the backup directory, # ending the delete operation try: self.delete_basebackup(backup) except OSError as e: output.error("Failure deleting backup %s for server %s.\n%s\n" "Please manually remove the '%s' directory", backup.backup_id, self.config.name, e, backup.get_basebackup_directory()) return self.backup_cache_remove(backup) output.info("Done")
def delete_backup(self, backup): """ Delete a backup :param backup: the backup to delete """ available_backups = self.get_available_backups() minimum_redundancy = self.server.config.minimum_redundancy # Honour minimum required redundancy if backup.status == BackupInfo.DONE and \ minimum_redundancy >= len(available_backups): output.warning( "Skipping delete of backup %s for server %s " "due to minimum redundancy requirements " "(minimum redundancy = %s, " "current redundancy = %s)", backup.backup_id, self.config.name, len(available_backups), minimum_redundancy) return output.info("Deleting backup %s for server %s", backup.backup_id, self.config.name) previous_backup = self.get_previous_backup(backup.backup_id) next_backup = self.get_next_backup(backup.backup_id) # Delete all the data contained in the backup try: self.delete_backup_data(backup) except OSError as e: output.error("Failure deleting backup %s for server %s.\n%s", backup.backup_id, self.config.name, e) return # Check if we are deleting the first available backup if not previous_backup: # In the case of exclusive backup (default), removes any WAL # files associated to the backup being deleted. # In the case of concurrent backup, removes only WAL files # prior to the start of the backup being deleted, as they # might be useful to any concurrent backup started immediately # after. remove_until = None # means to remove all WAL files if next_backup: remove_until = next_backup elif BackupOptions.CONCURRENT_BACKUP in self.config.backup_options: remove_until = backup output.info("Delete associated WAL segments:") for name in self.remove_wal_before_backup(remove_until): output.info("\t%s", name) # As last action, remove the backup directory, # ending the delete operation try: self.delete_basebackup(backup) except OSError as e: output.error( "Failure deleting backup %s for server %s.\n%s\n" "Please manually remove the '%s' directory", backup.backup_id, self.config.name, e, backup.get_basebackup_directory()) return self.backup_cache_remove(backup) output.info("Done")
def delete_wal(self, wal_info): """ Delete a WAL segment, with the given WalFileInfo :param barman.infofile.WalFileInfo wal_info: the WAL to delete """ # Run the pre_wal_delete_script if present. script = HookScriptRunner(self, "wal_delete_script", "pre") script.env_from_wal_info(wal_info) script.run() # Run the pre_wal_delete_retry_script if present. retry_script = RetryHookScriptRunner(self, "wal_delete_retry_script", "pre") retry_script.env_from_wal_info(wal_info) retry_script.run() error = None try: os.unlink(wal_info.fullpath(self.server)) try: os.removedirs(os.path.dirname(wal_info.fullpath(self.server))) except OSError: # This is not an error condition # We always try to remove the the trailing directories, # this means that hashdir is not empty. pass except OSError as e: error = "Ignoring deletion of WAL file %s for server %s: %s" % ( wal_info.name, self.config.name, e, ) output.warning(error) # Run the post_wal_delete_retry_script if present. try: retry_script = RetryHookScriptRunner(self, "wal_delete_retry_script", "post") retry_script.env_from_wal_info(wal_info, None, error) retry_script.run() except AbortedRetryHookScript as e: # Ignore the ABORT_STOP as it is a post-hook operation _logger.warning( "Ignoring stop request after receiving " "abort (exit code %d) from post-wal-delete " "retry hook script: %s", e.hook.exit_status, e.hook.script, ) # Run the post_wal_delete_script if present. script = HookScriptRunner(self, "wal_delete_script", "post") script.env_from_wal_info(wal_info, None, error) script.run()
def _validate_with_keys(config_items, allowed_keys, section): """ Check every config parameter against a list of allowed keys :param config_items: list of tuples containing provided parameters along with their values :param allowed_keys: list of allowed keys :param section: source section (for error reporting) """ for parameter in config_items: # if the parameter name is not in the list of allowed values, # then output a warning name = parameter[0] if name not in allowed_keys: output.warning('Invalid configuration option "%s" in [%s] ' "section.", name, section)
def _validate_with_keys(config_items, allowed_keys, section): """ Check every config parameter against a list of allowed keys :param config_items: list of tuples containing provided parameters along with their values :param allowed_keys: list of allowed keys :param section: source section (for error reporting) """ for parameter in config_items: # if the parameter name is not in the list of allowed values, # then output a warning name = parameter[0] if name not in allowed_keys: output.warning('Invalid configuration option "%s" in [%s] ' 'section.', name, section)
def invoke_parser(self, key, source, value, new_value): """ Function used for parsing configuration values. If needed, it uses special parsers from the PARSERS map, and handles parsing exceptions. Uses two values (value and new_value) to manage configuration hierarchy (server config overwrites global config). :param str key: the name of the configuration option :param str source: the section that contains the configuration option :param value: the old value of the option if present. :param str new_value: the new value that needs to be parsed :return: the parsed value of a configuration option """ # If the new value is None, returns the old value if new_value is None: return value # If we have a parser for the current key, use it to obtain the # actual value. If an exception is thrown, print a warning and # ignore the value. # noinspection PyBroadException if key in self.PARSERS: parser = self.PARSERS[key] try: # If the parser is a subclass of the CsvOption class # we need a different invocation, which passes not only # the value to the parser, but also the key name # and the section that contains the configuration if inspect.isclass(parser) and issubclass(parser, CsvOption): value = parser(new_value, key, source) else: value = parser(new_value) except Exception as e: output.warning( "Ignoring invalid configuration value '%s' for key %s in %s: %s", new_value, key, source, e, ) else: value = new_value return value
def test_warning(self, caplog): # preparation writer = self._mock_writer() msg = "test message" output.warning(msg) # logging test for record in caplog.records: assert record.levelname == "WARNING" assert record.name == __name__ assert msg in caplog.text # writer test assert not writer.error_occurred.called writer.warning.assert_called_once_with(msg) # global status test assert not output.error_occurred
def test_warning_error(self, caplog): # preparation writer = self._mock_writer() msg = 'test message' output.warning(msg, is_error=True) # logging test for record in caplog.records: assert record.levelname == 'WARNING' assert record.name == __name__ assert msg in caplog.text # writer test writer.error_occurred.assert_called_once_with() writer.warning.assert_called_once_with(msg) # global status test assert output.error_occurred
def test_warning_error(self, caplog): # preparation writer = self._mock_writer() msg = 'test message' output.warning(msg, is_error=True) # logging test for record in caplog.records(): assert record.levelname == 'WARNING' assert record.name == __name__ assert msg in caplog.text() # writer test writer.error_occurred.assert_called_once_with() writer.warning.assert_called_once_with(msg) # global status test assert output.error_occurred
def invoke_parser(self, key, source, value, new_value): """ Function used for parsing configuration values. If needed, it uses special parsers from the PARSERS map, and handles parsing exceptions. Uses two values (value and new_value) to manage configuration hierarchy (server config overwrites global config). :param str key: the name of the configuration option :param str source: the section that contains the configuration option :param value: the old value of the option if present. :param str new_value: the new value that needs to be parsed :return: the parsed value of a configuration option """ # If the new value is None, returns the old value if new_value is None: return value # If we have a parser for the current key, use it to obtain the # actual value. If an exception is thrown, print a warning and # ignore the value. # noinspection PyBroadException if key in self.PARSERS: parser = self.PARSERS[key] try: # If the parser is a subclass of the CsvOption class # we need a different invocation, which passes not only # the value to the parser, but also the key name # and the section that contains the configuration if inspect.isclass(parser) \ and issubclass(parser, CsvOption): value = parser(new_value, key, source) else: value = parser(new_value) except Exception as e: output.warning("Invalid configuration value '%s' for key %s" " in %s: %s", value, key, source, e) _logger.exception(e) else: value = new_value return value
def test_warning_with_args(self, caplog): # preparation writer = self._mock_writer() msg = 'test format %02d %s' args = (1, '2nd') output.warning(msg, *args) # logging test for record in caplog.records(): assert record.levelname == 'WARNING' assert record.name == __name__ assert msg % args in caplog.text() # writer test assert not writer.error_occurred.called writer.warning.assert_called_once_with(msg, *args) # global status test assert not output.error_occurred
def delete_wal(self, wal_info): """ Delete a WAL segment, with the given WalFileInfo :param barman.infofile.WalFileInfo wal_info: the WAL to delete """ try: os.unlink(wal_info.fullpath(self.server)) try: os.removedirs(os.path.dirname(wal_info.fullpath(self.server))) except OSError: # This is not an error condition # We always try to remove the the trailing directories, # this means that hashdir is not empty. pass except OSError as e: output.warning( 'Ignoring deletion of WAL file %s ' 'for server %s: %s', wal_info.name, self.config.name, e)
def test_warning_with_args(self, caplog): # preparation writer = self._mock_writer() msg = 'test format %02d %s' args = (1, '2nd') output.warning(msg, *args) # logging test for record in caplog.records: assert record.levelname == 'WARNING' assert record.name == __name__ assert msg % args in caplog.text # writer test assert not writer.error_occurred.called writer.warning.assert_called_once_with(msg, *args) # global status test assert not output.error_occurred
def delete_wal(self, wal_info): """ Delete a WAL segment, with the given WalFileInfo :param barman.infofile.WalFileInfo wal_info: the WAL to delete """ try: os.unlink(wal_info.fullpath(self.server)) try: os.removedirs(os.path.dirname(wal_info.fullpath(self.server))) except OSError: # This is not an error condition # We always try to remove the the trailing directories, # this means that hashdir is not empty. pass except OSError as e: output.warning('Ignoring deletion of WAL file %s ' 'for server %s: %s', wal_info.name, self.config.name, e)
def retry_backup_copy(self, target_function, *args, **kwargs): """ Execute the target backup copy function, retrying the configured number of times :param target_function: the base backup target function :param args: args for the target function :param kwargs: kwargs of the target function :return: the result of the target function """ attempts = 0 while True: try: # if is not the first attempt, output the retry number if attempts >= 1: output.warning("Copy of base backup: retry #%s", attempts) # execute the target function for backup copy return target_function(*args, **kwargs) # catch rsync errors except DataTransferFailure, e: # exit condition: if retry number is lower than configured retry # limit, try again; otherwise exit. if attempts < self.config.basebackup_retry_times: # Log the exception, for debugging purpose _logger.exception("Failure in base backup copy: %s", e) output.warning( "Copy of base backup failed, waiting for next " "attempt in %s seconds", self.config.basebackup_retry_sleep) # sleep for configured time. then try again time.sleep(self.config.basebackup_retry_sleep) attempts += 1 else: # if the max number of attempts is reached an there is still # an error, exit re-raising the exception. raise
def retry_backup_copy(self, target_function, *args, **kwargs): """ Execute the target backup copy function, retrying the configured number of times :param target_function: the base backup target function :param args: args for the target function :param kwargs: kwargs of the target function :return: the result of the target function """ attempts = 0 while True: try: # if is not the first attempt, output the retry number if attempts >= 1: output.warning("Copy of base backup: retry #%s", attempts) # execute the target function for backup copy return target_function(*args, **kwargs) # catch rsync errors except DataTransferFailure as e: # exit condition: if retry number is lower than configured # retry limit, try again; otherwise exit. if attempts < self.config.basebackup_retry_times: # Log the exception, for debugging purpose _logger.exception("Failure in base backup copy: %s", e) output.warning( "Copy of base backup failed, waiting for next " "attempt in %s seconds", self.config.basebackup_retry_sleep) # sleep for configured time. then try again time.sleep(self.config.basebackup_retry_sleep) attempts += 1 else: # if the max number of attempts is reached and # there is still an error, exit re-raising the exception. raise
def delete_backup(self, backup): """ Delete a backup :param backup: the backup to delete :return bool: True if deleted, False if could not delete the backup """ available_backups = self.get_available_backups( status_filter=(BackupInfo.DONE, )) minimum_redundancy = self.server.config.minimum_redundancy # Honour minimum required redundancy if backup.status == BackupInfo.DONE and \ minimum_redundancy >= len(available_backups): output.warning( "Skipping delete of backup %s for server %s " "due to minimum redundancy requirements " "(minimum redundancy = %s, " "current redundancy = %s)", backup.backup_id, self.config.name, minimum_redundancy, len(available_backups)) return False # Keep track of when the delete operation started. delete_start_time = datetime.datetime.now() # Run the pre_delete_script if present. script = HookScriptRunner(self, 'delete_script', 'pre') script.env_from_backup_info(backup) script.run() # Run the pre_delete_retry_script if present. retry_script = RetryHookScriptRunner(self, 'delete_retry_script', 'pre') retry_script.env_from_backup_info(backup) retry_script.run() output.info("Deleting backup %s for server %s", backup.backup_id, self.config.name) previous_backup = self.get_previous_backup(backup.backup_id) next_backup = self.get_next_backup(backup.backup_id) # Delete all the data contained in the backup try: self.delete_backup_data(backup) except OSError as e: output.error("Failure deleting backup %s for server %s.\n%s", backup.backup_id, self.config.name, e) return False # Check if we are deleting the first available backup if not previous_backup: # In the case of exclusive backup (default), removes any WAL # files associated to the backup being deleted. # In the case of concurrent backup, removes only WAL files # prior to the start of the backup being deleted, as they # might be useful to any concurrent backup started immediately # after. remove_until = None # means to remove all WAL files if next_backup: remove_until = next_backup elif BackupOptions.CONCURRENT_BACKUP in self.config.backup_options: remove_until = backup timelines_to_protect = set() # If remove_until is not set there are no backup left if remove_until: # Retrieve the list of extra timelines that contains at least # a backup. On such timelines we don't want to delete any WAL for value in self.get_available_backups( BackupInfo.STATUS_ARCHIVING).values(): # Ignore the backup that is being deleted if value == backup: continue timelines_to_protect.add(value.timeline) # Remove the timeline of `remove_until` from the list. # We have enough information to safely delete unused WAL files # on it. timelines_to_protect -= set([remove_until.timeline]) output.info("Delete associated WAL segments:") for name in self.remove_wal_before_backup(remove_until, timelines_to_protect): output.info("\t%s", name) # As last action, remove the backup directory, # ending the delete operation try: self.delete_basebackup(backup) except OSError as e: output.error( "Failure deleting backup %s for server %s.\n%s\n" "Please manually remove the '%s' directory", backup.backup_id, self.config.name, e, backup.get_basebackup_directory()) return False self.backup_cache_remove(backup) # Save the time of the complete removal of the backup delete_end_time = datetime.datetime.now() output.info( "Deleted backup %s (start time: %s, elapsed time: %s)", backup.backup_id, delete_start_time.ctime(), human_readable_timedelta(delete_end_time - delete_start_time)) # Remove the sync lockfile if exists sync_lock = ServerBackupSyncLock(self.config.barman_lock_directory, self.config.name, backup.backup_id) if os.path.exists(sync_lock.filename): _logger.debug("Deleting backup sync lockfile: %s" % sync_lock.filename) os.unlink(sync_lock.filename) # Run the post_delete_retry_script if present. try: retry_script = RetryHookScriptRunner(self, 'delete_retry_script', 'post') retry_script.env_from_backup_info(backup) retry_script.run() except AbortedRetryHookScript as e: # Ignore the ABORT_STOP as it is a post-hook operation _logger.warning( "Ignoring stop request after receiving " "abort (exit code %d) from post-delete " "retry hook script: %s", e.hook.exit_status, e.hook.script) # Run the post_delete_script if present. script = HookScriptRunner(self, 'delete_script', 'post') script.env_from_backup_info(backup) script.run() return True
def recover(self, backup_info, dest, tablespaces=None, remote_command=None, target_tli=None, target_time=None, target_xid=None, target_name=None, target_immediate=False, exclusive=False, target_action=None, standby_mode=None): """ Performs a recovery of a backup This method should be called in a closing context :param barman.infofile.BackupInfo backup_info: the backup to recover :param str dest: the destination directory :param dict[str,str]|None tablespaces: a tablespace name -> location map (for relocation) :param str|None remote_command: The remote command to recover the base backup, in case of remote backup. :param str|None target_tli: the target timeline :param str|None target_time: the target time :param str|None target_xid: the target xid :param str|None target_name: the target name created previously with pg_create_restore_point() function call :param str|None target_immediate: end recovery as soon as consistency is reached :param bool exclusive: whether the recovery is exclusive or not :param str|None target_action: The recovery target action :param bool|None standby_mode: standby mode """ # Run the cron to be sure the wal catalog is up to date # Prepare a map that contains all the objects required for a recovery recovery_info = self._setup(backup_info, remote_command, dest) output.info("Starting %s restore for server %s using backup %s", recovery_info['recovery_dest'], self.server.config.name, backup_info.backup_id) output.info("Destination directory: %s", dest) if remote_command: output.info("Remote command: %s", remote_command) # If the backup we are recovering is still not validated and we # haven't requested the get-wal feature, display a warning message if not recovery_info['get_wal']: if backup_info.status == BackupInfo.WAITING_FOR_WALS: output.warning( "IMPORTANT: You have requested a recovery operation for " "a backup that does not have yet all the WAL files that " "are required for consistency.") # Set targets for PITR self._set_pitr_targets(recovery_info, backup_info, dest, target_name, target_time, target_tli, target_xid, target_immediate, target_action) # Retrieve the safe_horizon for smart copy self._retrieve_safe_horizon(recovery_info, backup_info, dest) # check destination directory. If doesn't exist create it try: recovery_info['cmd'].create_dir_if_not_exists(dest) except FsOperationFailed as e: output.error( "unable to initialise destination directory " "'%s': %s", dest, e) output.close_and_exit() # Initialize tablespace directories if backup_info.tablespaces: self._prepare_tablespaces(backup_info, recovery_info['cmd'], dest, tablespaces) # Copy the base backup output.info("Copying the base backup.") try: self._backup_copy(backup_info, dest, tablespaces, remote_command, recovery_info['safe_horizon']) except DataTransferFailure as e: output.error("Failure copying base backup: %s", e) output.close_and_exit() # Copy the backup.info file in the destination as # ".barman-recover.info" if remote_command: try: recovery_info['rsync'](backup_info.filename, ':%s/.barman-recover.info' % dest) except CommandFailedException as e: output.error('copy of recovery metadata file failed: %s', e) output.close_and_exit() else: backup_info.save(os.path.join(dest, '.barman-recover.info')) # Standby mode is not available for PostgreSQL older than 9.0 if backup_info.version < 90000 and standby_mode: raise RecoveryStandbyModeException( 'standby_mode is available only from PostgreSQL 9.0') # Restore the WAL segments. If GET_WAL option is set, skip this phase # as they will be retrieved using the wal-get command. if not recovery_info['get_wal']: # If the backup we restored is still waiting for WALS, read the # backup info again and check whether it has been validated. # Notify the user if it is still not DONE. if backup_info.status == BackupInfo.WAITING_FOR_WALS: data = BackupInfo(self.server, backup_info.filename) if data.status == BackupInfo.WAITING_FOR_WALS: output.warning( "IMPORTANT: The backup we have recovered IS NOT " "VALID. Required WAL files for consistency are " "missing. Please verify that WAL archiving is " "working correctly or evaluate using the 'get-wal' " "option for recovery") output.info("Copying required WAL segments.") try: # Retrieve a list of required log files required_xlog_files = tuple( self.server.get_required_xlog_files( backup_info, target_tli, recovery_info['target_epoch'])) # Restore WAL segments into the wal_dest directory self._xlog_copy(required_xlog_files, recovery_info['wal_dest'], remote_command) except DataTransferFailure as e: output.error("Failure copying WAL files: %s", e) output.close_and_exit() except BadXlogSegmentName as e: output.error( "invalid xlog segment name %r\n" "HINT: Please run \"barman rebuild-xlogdb %s\" " "to solve this issue", force_str(e), self.config.name) output.close_and_exit() # If WAL files are put directly in the pg_xlog directory, # avoid shipping of just recovered files # by creating the corresponding archive status file if not recovery_info['is_pitr']: output.info("Generating archive status files") self._generate_archive_status(recovery_info, remote_command, required_xlog_files) # Generate recovery.conf file (only if needed by PITR or get_wal) is_pitr = recovery_info['is_pitr'] get_wal = recovery_info['get_wal'] if is_pitr or get_wal or standby_mode: output.info("Generating recovery.conf") self._generate_recovery_conf(recovery_info, backup_info, dest, target_immediate, exclusive, remote_command, target_name, target_time, target_tli, target_xid, standby_mode) # Create archive_status directory if necessary archive_status_dir = os.path.join(recovery_info['wal_dest'], 'archive_status') try: recovery_info['cmd'].create_dir_if_not_exists(archive_status_dir) except FsOperationFailed as e: output.error( "unable to create the archive_status directory " "'%s': %s", archive_status_dir, e) output.close_and_exit() # As last step, analyse configuration files in order to spot # harmful options. Barman performs automatic conversion of # some options as well as notifying users of their existence. # # This operation is performed in three steps: # 1) mapping # 2) analysis # 3) copy output.info("Identify dangerous settings in destination directory.") self._map_temporary_config_files(recovery_info, backup_info, remote_command) self._analyse_temporary_config_files(recovery_info) self._copy_temporary_config_files(dest, remote_command, recovery_info) return recovery_info
def _xlog_copy(self, required_xlog_files, wal_dest, remote_command): """ Restore WAL segments :param required_xlog_files: list of all required WAL files :param wal_dest: the destination directory for xlog recover :param remote_command: default None. The remote command to recover the xlog, in case of remote backup. """ # List of required WAL files partitioned by containing directory xlogs = collections.defaultdict(list) # add '/' suffix to ensure it is a directory wal_dest = '%s/' % wal_dest # Map of every compressor used with any WAL file in the archive, # to be used during this recovery compressors = {} compression_manager = self.backup_manager.compression_manager # Fill xlogs and compressors maps from required_xlog_files for wal_info in required_xlog_files: hashdir = xlog.hash_dir(wal_info.name) xlogs[hashdir].append(wal_info) # If a compressor is required, make sure it exists in the cache if wal_info.compression is not None and \ wal_info.compression not in compressors: compressors[wal_info.compression] = \ compression_manager.get_compressor( compression=wal_info.compression) rsync = RsyncPgData( path=self.server.path, ssh=remote_command, bwlimit=self.config.bandwidth_limit, network_compression=self.config.network_compression) # If compression is used and this is a remote recovery, we need a # temporary directory where to spool uncompressed files, # otherwise we either decompress every WAL file in the local # destination, or we ship the uncompressed file remotely if compressors: if remote_command: # Decompress to a temporary spool directory wal_decompression_dest = tempfile.mkdtemp( prefix='barman_xlog-') else: # Decompress directly to the destination directory wal_decompression_dest = wal_dest # Make sure wal_decompression_dest exists mkpath(wal_decompression_dest) else: # If no compression wal_decompression_dest = None if remote_command: # If remote recovery tell rsync to copy them remotely # add ':' prefix to mark it as remote wal_dest = ':%s' % wal_dest total_wals = sum(map(len, xlogs.values())) partial_count = 0 for prefix in sorted(xlogs): batch_len = len(xlogs[prefix]) partial_count += batch_len source_dir = os.path.join(self.config.wals_directory, prefix) _logger.info("Starting copy of %s WAL files %s/%s from %s to %s", batch_len, partial_count, total_wals, xlogs[prefix][0], xlogs[prefix][-1]) # If at least one compressed file has been found, activate # compression check and decompression for each WAL files if compressors: for segment in xlogs[prefix]: dst_file = os.path.join(wal_decompression_dest, segment.name) if segment.compression is not None: compressors[segment.compression].decompress( os.path.join(source_dir, segment.name), dst_file) else: shutil.copy2(os.path.join(source_dir, segment.name), dst_file) if remote_command: try: # Transfer the WAL files rsync.from_file_list( list(segment.name for segment in xlogs[prefix]), wal_decompression_dest, wal_dest) except CommandFailedException as e: msg = ("data transfer failure while copying WAL files " "to directory '%s'") % (wal_dest[1:], ) raise DataTransferFailure.from_command_error( 'rsync', e, msg) # Cleanup files after the transfer for segment in xlogs[prefix]: file_name = os.path.join(wal_decompression_dest, segment.name) try: os.unlink(file_name) except OSError as e: output.warning( "Error removing temporary file '%s': %s", file_name, e) else: try: rsync.from_file_list( list(segment.name for segment in xlogs[prefix]), "%s/" % os.path.join(self.config.wals_directory, prefix), wal_dest) except CommandFailedException as e: msg = "data transfer failure while copying WAL files " \ "to directory '%s'" % (wal_dest[1:],) raise DataTransferFailure.from_command_error( 'rsync', e, msg) _logger.info("Finished copying %s WAL files.", total_wals) # Remove local decompression target directory if different from the # destination directory (it happens when compression is in use during a # remote recovery if wal_decompression_dest and wal_decompression_dest != wal_dest: shutil.rmtree(wal_decompression_dest)
key, backup_info.pgdata) _logger.debug(self.current_action) continue self.current_action = "copying %s as outside %s directory" % ( key, backup_info.pgdata) _logger.info(self.current_action) try: rsync(':%s' % cf, backup_dest) except CommandFailedException, e: ret_code = e.args[0]['ret'] msg = "data transfer failure on file '%s'" % cf if 'ident_file' == key and ret_code == 23: # If the ident file is missing, # it isn't an error condition for PostgreSQL. # Barman is consistent with this behavior. output.warning(msg, log=True) continue else: raise DataTransferFailure.from_rsync_error(e, msg) # Check for any include directives in PostgreSQL configuration # Currently, include directives are not supported for files that # reside outside PGDATA. These files must be manually backed up. # Barman will emit a warning and list those files if backup_info.included_files: filtered_files = [ included_file for included_file in backup_info.included_files if not included_file.startswith(backup_info.pgdata) ] if len(filtered_files) > 0: output.warning(
def delete_backup(self, backup): """ Delete a backup :param backup: the backup to delete """ available_backups = self.get_available_backups() minimum_redundancy = self.server.config.minimum_redundancy # Honour minimum required redundancy if backup.status == BackupInfo.DONE and \ minimum_redundancy >= len(available_backups): output.warning("Skipping delete of backup %s for server %s " "due to minimum redundancy requirements " "(minimum redundancy = %s, " "current redundancy = %s)", backup.backup_id, self.config.name, len(available_backups), minimum_redundancy) return output.info("Deleting backup %s for server %s", backup.backup_id, self.config.name) previous_backup = self.get_previous_backup(backup.backup_id) next_backup = self.get_next_backup(backup.backup_id) # Delete all the data contained in the backup try: self.delete_backup_data(backup) except OSError as e: output.error("Failure deleting backup %s for server %s.\n%s", backup.backup_id, self.config.name, e) return # Check if we are deleting the first available backup if not previous_backup: # In the case of exclusive backup (default), removes any WAL # files associated to the backup being deleted. # In the case of concurrent backup, removes only WAL files # prior to the start of the backup being deleted, as they # might be useful to any concurrent backup started immediately # after. remove_until = None # means to remove all WAL files if next_backup: remove_until = next_backup elif BackupOptions.CONCURRENT_BACKUP in self.config.backup_options: remove_until = backup timelines_to_protect = set() # If remove_until is not set there are no backup left if remove_until: # Retrieve the list of extra timelines that contains at least # a backup. On such timelines we don't want to delete any WAL for value in self.get_available_backups( BackupInfo.STATUS_ARCHIVING).values(): # Ignore the backup that is being deleted if value == backup: continue timelines_to_protect.add(value.timeline) # Remove the timeline of `remove_until` from the list. # We have enough information to safely delete unused WAL files # on it. timelines_to_protect -= set([remove_until.timeline]) output.info("Delete associated WAL segments:") for name in self.remove_wal_before_backup(remove_until, timelines_to_protect): output.info("\t%s", name) # As last action, remove the backup directory, # ending the delete operation try: self.delete_basebackup(backup) except OSError as e: output.error("Failure deleting backup %s for server %s.\n%s\n" "Please manually remove the '%s' directory", backup.backup_id, self.config.name, e, backup.get_basebackup_directory()) return self.backup_cache_remove(backup) output.info("Done")
"copying %s as outside %s directory" % ( key, backup_info.pgdata) _logger.info(self.current_action) try: rsync(':%s' % cf, backup_dest) except CommandFailedException, e: ret_code = e.args[0]['ret'] msg = "data transfer failure on file '%s'" % cf if 'ident_file' == key and ret_code == 23: # if the ident file is not present # it is not a blocking error, so, # we need to track why the exception is raised. # if ident file is missing, warn the user, log # the data transfer but continue the backup output.warning(msg, log=True) continue else: raise DataTransferFailure.from_rsync_error( e, msg) def reuse_dir(self, previous_backup_info, oid=None): """ If reuse_backup is 'copy' or 'link', builds the path of the directory to reuse, otherwise always returns None. If oid is None, it returns the full path of pgdata directory of the previous_backup otherwise it returns the path to the specified tablespace using it's oid. :param barman.infofile.BackupInfo previous_backup_info: backup to be
def backup_copy(self, backup_info): """ Perform the actual copy of the backup using Rsync. First, it copies one tablespace at a time, then the PGDATA directory, and finally configuration files (if outside PGDATA). Bandwidth limitation, according to configuration, is applied in the process. This method is the core of base backup copy using Rsync+Ssh. :param barman.infofile.BackupInfo backup_info: backup information """ # List of paths to be ignored by Rsync exclude_and_protect = [] # Retrieve the previous backup metadata, then set safe_horizon previous_backup = self.backup_manager.get_previous_backup( backup_info.backup_id) if previous_backup: # safe_horizon is a tz-aware timestamp because BackupInfo class # ensures it safe_horizon = previous_backup.begin_time else: # If no previous backup is present, safe_horizon is set to None safe_horizon = None # Copy tablespaces applying bwlimit when necessary if backup_info.tablespaces: tablespaces_bw_limit = self.config.tablespace_bandwidth_limit # Copy a tablespace at a time for tablespace in backup_info.tablespaces: self.current_action = "copying tablespace '%s'" % \ tablespace.name # Apply bandwidth limit if requested bwlimit = self.config.bandwidth_limit if tablespaces_bw_limit and \ tablespace.name in tablespaces_bw_limit: bwlimit = tablespaces_bw_limit[tablespace.name] if bwlimit: self.current_action += (" with bwlimit '%d'" % bwlimit) _logger.debug(self.current_action) # If the tablespace location is inside the data directory, # exclude and protect it from being copied twice during # the data directory copy if tablespace.location.startswith(backup_info.pgdata): exclude_and_protect.append( tablespace.location[len(backup_info.pgdata):]) # Make sure the destination directory exists in order for # smart copy to detect that no file is present there tablespace_dest = backup_info.get_data_directory( tablespace.oid) mkpath(tablespace_dest) # Exclude and protect the tablespace from being copied again # during the data directory copy exclude_and_protect.append("/pg_tblspc/%s" % tablespace.oid) # Copy the backup using smart_copy trying to reuse the # tablespace of the previous backup if incremental is active ref_dir = self._reuse_dir(previous_backup, tablespace.oid) tb_rsync = RsyncPgData( path=self.server.path, ssh=self.ssh_command, ssh_options=self.ssh_options, args=self._reuse_args(ref_dir), bwlimit=bwlimit, network_compression=self.config.network_compression, check=True) try: tb_rsync.smart_copy( ':%s/' % tablespace.location, tablespace_dest, safe_horizon, ref_dir) except CommandFailedException as e: msg = "data transfer failure on directory '%s'" % \ backup_info.get_data_directory(tablespace.oid) raise DataTransferFailure.from_rsync_error(e, msg) # Make sure the destination directory exists in order for smart copy # to detect that no file is present there backup_dest = backup_info.get_data_directory() mkpath(backup_dest) # Copy the PGDATA, trying to reuse the data dir # of the previous backup if incremental is active ref_dir = self._reuse_dir(previous_backup) rsync = RsyncPgData( path=self.server.path, ssh=self.ssh_command, ssh_options=self.ssh_options, args=self._reuse_args(ref_dir), bwlimit=self.config.bandwidth_limit, exclude_and_protect=exclude_and_protect, network_compression=self.config.network_compression) try: rsync.smart_copy(':%s/' % backup_info.pgdata, backup_dest, safe_horizon, ref_dir) except CommandFailedException as e: msg = "data transfer failure on directory '%s'" % \ backup_info.pgdata raise DataTransferFailure.from_rsync_error(e, msg) # At last copy pg_control try: rsync(':%s/global/pg_control' % (backup_info.pgdata,), '%s/global/pg_control' % (backup_dest,)) except CommandFailedException as e: msg = "data transfer failure on file '%s/global/pg_control'" % \ backup_info.pgdata raise DataTransferFailure.from_rsync_error(e, msg) # Copy configuration files (if not inside PGDATA) self.current_action = "copying configuration files" _logger.debug(self.current_action) for key in ('config_file', 'hba_file', 'ident_file'): cf = getattr(backup_info, key, None) if cf: assert isinstance(cf, str) # Consider only those that reside outside of the original # PGDATA directory if cf.startswith(backup_info.pgdata): self.current_action = \ "skipping %s as contained in %s directory" % ( key, backup_info.pgdata) _logger.debug(self.current_action) continue self.current_action = "copying %s as outside %s directory" % ( key, backup_info.pgdata) _logger.info(self.current_action) try: rsync(':%s' % cf, backup_dest) except CommandFailedException as e: ret_code = e.args[0]['ret'] msg = "data transfer failure on file '%s'" % cf if 'ident_file' == key and ret_code == 23: # If the ident file is missing, # it isn't an error condition for PostgreSQL. # Barman is consistent with this behavior. output.warning(msg, log=True) continue else: raise DataTransferFailure.from_rsync_error(e, msg) # Check for any include directives in PostgreSQL configuration # Currently, include directives are not supported for files that # reside outside PGDATA. These files must be manually backed up. # Barman will emit a warning and list those files if backup_info.included_files: filtered_files = [ included_file for included_file in backup_info.included_files if not included_file.startswith(backup_info.pgdata) ] if len(filtered_files) > 0: output.warning( "The usage of include directives is not supported " "for files that reside outside PGDATA.\n" "Please manually backup the following files:\n" "\t%s\n", "\n\t".join(filtered_files) )
def delete_backup(self, backup): """ Delete a backup :param backup: the backup to delete """ available_backups = self.get_available_backups() minimum_redundancy = self.server.config.minimum_redundancy # Honour minimum required redundancy if backup.status == BackupInfo.DONE and \ minimum_redundancy >= len(available_backups): output.warning( "Skipping delete of backup %s for server %s " "due to minimum redundancy requirements " "(minimum redundancy = %s, " "current redundancy = %s)", backup.backup_id, self.config.name, len(available_backups), minimum_redundancy) return # Keep track of when the delete operation started. delete_start_time = datetime.datetime.now() output.info("Deleting backup %s for server %s", backup.backup_id, self.config.name) previous_backup = self.get_previous_backup(backup.backup_id) next_backup = self.get_next_backup(backup.backup_id) # Delete all the data contained in the backup try: self.delete_backup_data(backup) except OSError as e: output.error("Failure deleting backup %s for server %s.\n%s", backup.backup_id, self.config.name, e) return # Check if we are deleting the first available backup if not previous_backup: # In the case of exclusive backup (default), removes any WAL # files associated to the backup being deleted. # In the case of concurrent backup, removes only WAL files # prior to the start of the backup being deleted, as they # might be useful to any concurrent backup started immediately # after. remove_until = None # means to remove all WAL files if next_backup: remove_until = next_backup elif BackupOptions.CONCURRENT_BACKUP in self.config.backup_options: remove_until = backup timelines_to_protect = set() # If remove_until is not set there are no backup left if remove_until: # Retrieve the list of extra timelines that contains at least # a backup. On such timelines we don't want to delete any WAL for value in self.get_available_backups( BackupInfo.STATUS_ARCHIVING).values(): # Ignore the backup that is being deleted if value == backup: continue timelines_to_protect.add(value.timeline) # Remove the timeline of `remove_until` from the list. # We have enough information to safely delete unused WAL files # on it. timelines_to_protect -= set([remove_until.timeline]) output.info("Delete associated WAL segments:") for name in self.remove_wal_before_backup(remove_until, timelines_to_protect): output.info("\t%s", name) # As last action, remove the backup directory, # ending the delete operation try: self.delete_basebackup(backup) except OSError as e: output.error( "Failure deleting backup %s for server %s.\n%s\n" "Please manually remove the '%s' directory", backup.backup_id, self.config.name, e, backup.get_basebackup_directory()) return self.backup_cache_remove(backup) # Save the time of the complete removal of the backup delete_end_time = datetime.datetime.now() output.info( "Deleted backup %s (start time: %s, elapsed time: %s)", backup.backup_id, delete_start_time.ctime(), human_readable_timedelta(delete_end_time - delete_start_time))
def xlog_copy(self, required_xlog_files, wal_dest, remote_command): """ Restore WAL segments :param required_xlog_files: list of all required WAL files :param wal_dest: the destination directory for xlog recover :param remote_command: default None. The remote command to recover the xlog, in case of remote backup. """ # List of required WAL files partitioned by containing directory xlogs = collections.defaultdict(list) # add '/' suffix to ensure it is a directory wal_dest = '%s/' % wal_dest # Map of every compressor used with any WAL file in the archive, # to be used during this recovery compressors = {} compression_manager = self.backup_manager.compression_manager # Fill xlogs and compressors maps from required_xlog_files for wal_info in required_xlog_files: hashdir = xlog.hash_dir(wal_info.name) xlogs[hashdir].append(wal_info) # If a compressor is required, make sure it exists in the cache if wal_info.compression is not None and \ wal_info.compression not in compressors: compressors[wal_info.compression] = \ compression_manager.get_compressor( compression=wal_info.compression) rsync = RsyncPgData( path=self.server.path, ssh=remote_command, bwlimit=self.config.bandwidth_limit, network_compression=self.config.network_compression) # If compression is used and this is a remote recovery, we need a # temporary directory where to spool uncompressed files, # otherwise we either decompress every WAL file in the local # destination, or we ship the uncompressed file remotely if compressors: if remote_command: # Decompress to a temporary spool directory wal_decompression_dest = tempfile.mkdtemp( prefix='barman_xlog-') else: # Decompress directly to the destination directory wal_decompression_dest = wal_dest # Make sure wal_decompression_dest exists mkpath(wal_decompression_dest) else: # If no compression wal_decompression_dest = None if remote_command: # If remote recovery tell rsync to copy them remotely # add ':' prefix to mark it as remote wal_dest = ':%s' % wal_dest total_wals = sum(map(len, xlogs.values())) partial_count = 0 for prefix in sorted(xlogs): batch_len = len(xlogs[prefix]) partial_count += batch_len source_dir = os.path.join(self.config.wals_directory, prefix) _logger.info( "Starting copy of %s WAL files %s/%s from %s to %s", batch_len, partial_count, total_wals, xlogs[prefix][0], xlogs[prefix][-1]) # If at least one compressed file has been found, activate # compression check and decompression for each WAL files if compressors: for segment in xlogs[prefix]: dst_file = os.path.join(wal_decompression_dest, segment.name) if segment.compression is not None: compressors[segment.compression].decompress( os.path.join(source_dir, segment.name), dst_file) else: shutil.copy2(os.path.join(source_dir, segment.name), dst_file) if remote_command: try: # Transfer the WAL files rsync.from_file_list( list(segment.name for segment in xlogs[prefix]), wal_decompression_dest, wal_dest) except CommandFailedException as e: msg = ("data transfer failure while copying WAL files " "to directory '%s'") % (wal_dest[1:],) raise DataTransferFailure.from_rsync_error(e, msg) # Cleanup files after the transfer for segment in xlogs[prefix]: file_name = os.path.join(wal_decompression_dest, segment.name) try: os.unlink(file_name) except OSError as e: output.warning( "Error removing temporary file '%s': %s", file_name, e) else: try: rsync.from_file_list( list(segment.name for segment in xlogs[prefix]), "%s/" % os.path.join(self.config.wals_directory, prefix), wal_dest) except CommandFailedException as e: msg = "data transfer failure while copying WAL files " \ "to directory '%s'" % (wal_dest[1:],) raise DataTransferFailure.from_rsync_error(e, msg) _logger.info("Finished copying %s WAL files.", total_wals) # Remove local decompression target directory if different from the # destination directory (it happens when compression is in use during a # remote recovery if wal_decompression_dest and wal_decompression_dest != wal_dest: shutil.rmtree(wal_decompression_dest)
def xlog_copy(self, required_xlog_files, wal_dest, remote_command): """ Restore WAL segments :param required_xlog_files: list of all required WAL files :param wal_dest: the destination directory for xlog recover :param remote_command: default None. The remote command to recover the xlog, in case of remote backup. """ # Retrieve the list of required WAL segments # according to recovery options xlogs = {} for wal_info in required_xlog_files: hashdir = xlog.hash_dir(wal_info.name) if hashdir not in xlogs: xlogs[hashdir] = [] xlogs[hashdir].append(wal_info.name) # Check decompression options compressor = self.backup_manager.compression_manager.get_compressor() rsync = RsyncPgData( ssh=remote_command, bwlimit=self.config.bandwidth_limit, network_compression=self.config.network_compression) if remote_command: # If remote recovery tell rsync to copy them remotely # add ':' prefix to mark it as remote # add '/' suffix to ensure it is a directory wal_dest = ':%s/' % wal_dest else: # we will not use rsync: destdir must exists mkpath(wal_dest) if compressor and remote_command: xlog_spool = tempfile.mkdtemp(prefix='barman_xlog-') total_wals = sum(map(len, xlogs.values())) partial_count = 0 for prefix in sorted(xlogs): batch_len = len(xlogs[prefix]) partial_count += batch_len source_dir = os.path.join(self.config.wals_directory, prefix) _logger.info( "Starting copy of %s WAL files %s/%s from %s to %s", batch_len, partial_count, total_wals, xlogs[prefix][0], xlogs[prefix][-1]) if compressor: if remote_command: for segment in xlogs[prefix]: compressor.decompress(os.path.join(source_dir, segment), os.path.join(xlog_spool, segment)) try: rsync.from_file_list(xlogs[prefix], xlog_spool, wal_dest) except CommandFailedException, e: msg = "data transfer failure while copying WAL files " \ "to directory '%s'" % (wal_dest[1:],) raise DataTransferFailure.from_rsync_error(e, msg) # Cleanup files after the transfer for segment in xlogs[prefix]: file_name = os.path.join(xlog_spool, segment) try: os.unlink(file_name) except OSError as e: output.warning( "Error removing temporary file '%s': %s", file_name, e) else: # decompress directly to the right place for segment in xlogs[prefix]: compressor.decompress(os.path.join(source_dir, segment), os.path.join(wal_dest, segment)) else: try: rsync.from_file_list( xlogs[prefix], "%s/" % os.path.join( self.config.wals_directory, prefix), wal_dest) except CommandFailedException, e: msg = "data transfer failure while copying WAL files " \ "to directory '%s'" % (wal_dest[1:],) raise DataTransferFailure.from_rsync_error(e, msg)