def test_retry_hooks_with_retry(self, command_mock, sleep_mock): # BackupManager mock backup_manager = build_backup_manager(name="test_server") backup_manager.config.pre_test_retry_hook = "not_existent_script" # Command mock executed by HookScriptRunner command_mock.return_value.side_effect = [ 1, 1, 1, RetryHookScriptRunner.EXIT_SUCCESS, ] # the actual test script = RetryHookScriptRunner(backup_manager, "test_retry_hook", "pre") expected_env = { "BARMAN_PHASE": "pre", "BARMAN_VERSION": version, "BARMAN_SERVER": "test_server", "BARMAN_CONFIGURATION": "build_config_from_dicts", "BARMAN_HOOK": "test_retry_hook", "BARMAN_RETRY": "1", } # Shorten wait time after failures script.ATTEMPTS_BEFORE_NAP = 2 script.BREAK_TIME = 1 script.NAP_TIME = 1 assert script.run() == RetryHookScriptRunner.EXIT_SUCCESS assert command_mock.call_count == 4 assert command_mock.call_args[1]["env_append"] == expected_env command_mock.reset_mock() # Command mock executed by HookScriptRunner command_mock.return_value.side_effect = [ 1, 2, 3, 4, 5, 6, RetryHookScriptRunner.EXIT_ABORT_CONTINUE, ] # the actual test script = RetryHookScriptRunner(backup_manager, "test_retry_hook", "pre") expected_env = { "BARMAN_PHASE": "pre", "BARMAN_VERSION": version, "BARMAN_SERVER": "test_server", "BARMAN_CONFIGURATION": "build_config_from_dicts", "BARMAN_HOOK": "test_retry_hook", "BARMAN_RETRY": "1", } # Shorten wait time after failures script.ATTEMPTS_BEFORE_NAP = 2 script.BREAK_TIME = 1 script.NAP_TIME = 1 assert script.run() == RetryHookScriptRunner.EXIT_ABORT_CONTINUE assert command_mock.call_count == 7 assert command_mock.call_args[1]["env_append"] == expected_env
def backup(self): """ Performs a backup for the server """ _logger.debug("initialising backup information") self.executor.init() backup_info = None try: # Create the BackupInfo object representing the backup backup_info = BackupInfo( self.server, backup_id=datetime.datetime.now().strftime('%Y%m%dT%H%M%S')) backup_info.save() self.backup_cache_add(backup_info) output.info( "Starting backup for server %s in %s", self.config.name, backup_info.get_basebackup_directory()) # Run the pre-backup-script if present. script = HookScriptRunner(self, 'backup_script', 'pre') script.env_from_backup_info(backup_info) script.run() # Run the pre-backup-retry-script if present. retry_script = RetryHookScriptRunner( self, 'backup_retry_script', 'pre') retry_script.env_from_backup_info(backup_info) retry_script.run() # Do the backup using the BackupExecutor self.executor.backup(backup_info) # Compute backup size and fsync it on disk self.backup_fsync_and_set_sizes(backup_info) # Mark the backup as DONE backup_info.set_attribute("status", "DONE") # Use BaseException instead of Exception to catch events like # KeyboardInterrupt (e.g.: CRTL-C) except BaseException, e: msg_lines = str(e).strip().splitlines() if backup_info: # Use only the first line of exception message # in backup_info error field backup_info.set_attribute("status", "FAILED") # If the exception has no attached message use the raw type name if len(msg_lines) == 0: msg_lines = [type(e).__name__] backup_info.set_attribute( "error", "failure %s (%s)" % ( self.executor.current_action, msg_lines[0])) output.error("Backup failed %s.\nDETAILS: %s\n%s", self.executor.current_action, msg_lines[0], '\n'.join(msg_lines[1:]))
def delete_wal(self, wal_info): """ Delete a WAL segment, with the given WalFileInfo :param barman.infofile.WalFileInfo wal_info: the WAL to delete """ # Run the pre_wal_delete_script if present. script = HookScriptRunner(self, "wal_delete_script", "pre") script.env_from_wal_info(wal_info) script.run() # Run the pre_wal_delete_retry_script if present. retry_script = RetryHookScriptRunner(self, "wal_delete_retry_script", "pre") retry_script.env_from_wal_info(wal_info) retry_script.run() error = None try: os.unlink(wal_info.fullpath(self.server)) try: os.removedirs(os.path.dirname(wal_info.fullpath(self.server))) except OSError: # This is not an error condition # We always try to remove the the trailing directories, # this means that hashdir is not empty. pass except OSError as e: error = "Ignoring deletion of WAL file %s for server %s: %s" % ( wal_info.name, self.config.name, e, ) output.warning(error) # Run the post_wal_delete_retry_script if present. try: retry_script = RetryHookScriptRunner(self, "wal_delete_retry_script", "post") retry_script.env_from_wal_info(wal_info, None, error) retry_script.run() except AbortedRetryHookScript as e: # Ignore the ABORT_STOP as it is a post-hook operation _logger.warning( "Ignoring stop request after receiving " "abort (exit code %d) from post-wal-delete " "retry hook script: %s", e.hook.exit_status, e.hook.script, ) # Run the post_wal_delete_script if present. script = HookScriptRunner(self, "wal_delete_script", "post") script.env_from_wal_info(wal_info, None, error) script.run()
def test_retry_hooks_with_retry(self, command_mock, sleep_mock): # BackupManager mock backup_manager = build_backup_manager(name='test_server') backup_manager.config.pre_test_retry_hook = 'not_existent_script' # Command mock executed by HookScriptRunner command_mock.return_value.side_effect = [ 1, 1, 1, RetryHookScriptRunner.EXIT_SUCCESS ] # the actual test script = RetryHookScriptRunner(backup_manager, 'test_retry_hook', 'pre') expected_env = { 'BARMAN_PHASE': 'pre', 'BARMAN_VERSION': version, 'BARMAN_SERVER': 'test_server', 'BARMAN_CONFIGURATION': 'build_config_from_dicts', 'BARMAN_HOOK': 'test_retry_hook', 'BARMAN_RETRY': '1', } # Shorten wait time after failures script.ATTEMPTS_BEFORE_NAP = 2 script.BREAK_TIME = 1 script.NAP_TIME = 1 assert script.run() == RetryHookScriptRunner.EXIT_SUCCESS assert command_mock.call_count == 4 assert command_mock.call_args[1]['env_append'] == expected_env command_mock.reset_mock() # Command mock executed by HookScriptRunner command_mock.return_value.side_effect = [ 1, 2, 3, 4, 5, 6, RetryHookScriptRunner.EXIT_ABORT_CONTINUE ] # the actual test script = RetryHookScriptRunner(backup_manager, 'test_retry_hook', 'pre') expected_env = { 'BARMAN_PHASE': 'pre', 'BARMAN_VERSION': version, 'BARMAN_SERVER': 'test_server', 'BARMAN_CONFIGURATION': 'build_config_from_dicts', 'BARMAN_HOOK': 'test_retry_hook', 'BARMAN_RETRY': '1', } # Shorten wait time after failures script.ATTEMPTS_BEFORE_NAP = 2 script.BREAK_TIME = 1 script.NAP_TIME = 1 assert script.run() == RetryHookScriptRunner.EXIT_ABORT_CONTINUE assert command_mock.call_count == 7 assert command_mock.call_args[1]['env_append'] == expected_env
def test_retry_hooks_with_retry(self, command_mock, sleep_mock): # BackupManager mock backup_manager = build_backup_manager(name='test_server') backup_manager.config.pre_test_retry_hook = 'not_existent_script' # Command mock executed by HookScriptRunner command_mock.return_value.side_effect = [ 1, 1, 1, RetryHookScriptRunner.EXIT_SUCCESS] # the actual test script = RetryHookScriptRunner(backup_manager, 'test_retry_hook', 'pre') expected_env = { 'BARMAN_PHASE': 'pre', 'BARMAN_VERSION': version, 'BARMAN_SERVER': 'test_server', 'BARMAN_CONFIGURATION': 'build_config_from_dicts', 'BARMAN_HOOK': 'test_retry_hook', 'BARMAN_RETRY': '1', } # Shorten wait time after failures script.ATTEMPTS_BEFORE_NAP = 2 script.BREAK_TIME = 1 script.NAP_TIME = 1 assert script.run() == RetryHookScriptRunner.EXIT_SUCCESS assert command_mock.call_count == 4 assert command_mock.call_args[1]['env_append'] == expected_env command_mock.reset_mock() # Command mock executed by HookScriptRunner command_mock.return_value.side_effect = [ 1, 2, 3, 4, 5, 6, RetryHookScriptRunner.EXIT_ABORT_CONTINUE] # the actual test script = RetryHookScriptRunner(backup_manager, 'test_retry_hook', 'pre') expected_env = { 'BARMAN_PHASE': 'pre', 'BARMAN_VERSION': version, 'BARMAN_SERVER': 'test_server', 'BARMAN_CONFIGURATION': 'build_config_from_dicts', 'BARMAN_HOOK': 'test_retry_hook', 'BARMAN_RETRY': '1', } # Shorten wait time after failures script.ATTEMPTS_BEFORE_NAP = 2 script.BREAK_TIME = 1 script.NAP_TIME = 1 assert script.run() == RetryHookScriptRunner.EXIT_ABORT_CONTINUE assert command_mock.call_count == 7 assert command_mock.call_args[1]['env_append'] == expected_env
def test_retry_hook_abort(self, command_mock, sleep_mock): # BackupManager mock backup_manager = build_backup_manager(name="test_server") backup_manager.config.pre_test_retry_hook = "not_existent_script" # Command mock executed by HookScriptRunner command_mock.return_value.return_value = RetryHookScriptRunner.EXIT_ABORT_STOP # the actual test script = RetryHookScriptRunner(backup_manager, "test_retry_hook", "pre") with pytest.raises(AbortedRetryHookScript) as excinfo: assert script.run() == RetryHookScriptRunner.EXIT_ABORT_STOP assert (str( excinfo.value) == "Abort 'pre_test_retry_hook' retry hook script " "(not_existent_script, exit code: 63)")
def test_retry_hook_abort(self, command_mock, sleep_mock): # BackupManager mock backup_manager = build_backup_manager(name='test_server') backup_manager.config.pre_test_retry_hook = 'not_existent_script' # Command mock executed by HookScriptRunner command_mock.return_value.return_value = \ RetryHookScriptRunner.EXIT_ABORT_STOP # the actual test script = RetryHookScriptRunner(backup_manager, 'test_retry_hook', 'pre') with pytest.raises(AbortedRetryHookScript) as excinfo: assert script.run() == RetryHookScriptRunner.EXIT_ABORT_STOP assert str(excinfo.value) == \ "Abort 'pre_test_retry_hook' retry hook script " \ "(not_existent_script, exit code: 63)"
def test_retry_hooks(self, command_mock, sleep_mock): # BackupManager mock backup_manager = build_backup_manager(name="test_server") backup_manager.config.pre_test_retry_hook = "not_existent_script" # Command mock executed by HookScriptRunner command_mock.return_value.return_value = 0 # the actual test script = RetryHookScriptRunner(backup_manager, "test_retry_hook", "pre") expected_env = { "BARMAN_PHASE": "pre", "BARMAN_VERSION": version, "BARMAN_SERVER": "test_server", "BARMAN_CONFIGURATION": "build_config_from_dicts", "BARMAN_HOOK": "test_retry_hook", "BARMAN_RETRY": "1", } assert script.run() == 0 assert command_mock.call_count == 1 assert command_mock.call_args[1]["env_append"] == expected_env
def test_retry_hooks(self, command_mock, sleep_mock): # BackupManager mock backup_manager = build_backup_manager(name='test_server') backup_manager.config.pre_test_retry_hook = 'not_existent_script' # Command mock executed by HookScriptRunner command_mock.return_value.return_value = 0 # the actual test script = RetryHookScriptRunner(backup_manager, 'test_retry_hook', 'pre') expected_env = { 'BARMAN_PHASE': 'pre', 'BARMAN_VERSION': version, 'BARMAN_SERVER': 'test_server', 'BARMAN_CONFIGURATION': 'build_config_from_dicts', 'BARMAN_HOOK': 'test_retry_hook', 'BARMAN_RETRY': '1', } assert script.run() == 0 assert command_mock.call_count == 1 assert command_mock.call_args[1]['env_append'] == expected_env
else: output.info("Backup end at xlog location: %s (%s, %08X)", backup_info.end_xlog, backup_info.end_wal, backup_info.end_offset) output.info("Backup completed") finally: if backup_info: backup_info.save() # Run the post-backup-retry-script if present. try: retry_script = RetryHookScriptRunner( self, 'backup_retry_script', 'post') retry_script.env_from_backup_info(backup_info) retry_script.run() except AbortedRetryHookScript, e: # Ignore the ABORT_STOP as it is a post-hook operation _logger.warning("Ignoring stop request after receiving " "abort (exit code %d) from post-backup " "retry hook script: %s", e.hook.exit_status, e.hook.script) # Run the post-backup-script if present. script = HookScriptRunner(self, 'backup_script', 'post') script.env_from_backup_info(backup_info) script.run() output.result('backup', backup_info) def recover(self, backup_info, dest, tablespaces=None, target_tli=None,
def backup(self): """ Performs a backup for the server """ _logger.debug("initialising backup information") self.executor.init() backup_info = None try: # Create the BackupInfo object representing the backup backup_info = BackupInfo( self.server, backup_id=datetime.datetime.now().strftime('%Y%m%dT%H%M%S')) backup_info.save() self.backup_cache_add(backup_info) output.info( "Starting backup using %s method for server %s in %s", self.mode, self.config.name, backup_info.get_basebackup_directory()) # Run the pre-backup-script if present. script = HookScriptRunner(self, 'backup_script', 'pre') script.env_from_backup_info(backup_info) script.run() # Run the pre-backup-retry-script if present. retry_script = RetryHookScriptRunner( self, 'backup_retry_script', 'pre') retry_script.env_from_backup_info(backup_info) retry_script.run() # Do the backup using the BackupExecutor self.executor.backup(backup_info) # Compute backup size and fsync it on disk self.backup_fsync_and_set_sizes(backup_info) # Mark the backup as DONE backup_info.set_attribute("status", "DONE") # Use BaseException instead of Exception to catch events like # KeyboardInterrupt (e.g.: CRTL-C) except BaseException as e: msg_lines = str(e).strip().splitlines() if backup_info: # Use only the first line of exception message # in backup_info error field backup_info.set_attribute("status", "FAILED") # If the exception has no attached message use the raw # type name if len(msg_lines) == 0: msg_lines = [type(e).__name__] backup_info.set_attribute( "error", "failure %s (%s)" % ( self.executor.current_action, msg_lines[0])) output.error("Backup failed %s.\nDETAILS: %s\n%s", self.executor.current_action, msg_lines[0], '\n'.join(msg_lines[1:])) else: output.info("Backup end at xlog location: %s (%s, %08X)", backup_info.end_xlog, backup_info.end_wal, backup_info.end_offset) output.info("Backup completed") # Create a restore point after a backup target_name = 'barman_%s' % backup_info.backup_id self.server.postgres.create_restore_point(target_name) finally: if backup_info: backup_info.save() # Make sure we are not holding any PostgreSQL connection # during the post-backup scripts self.server.close() # Run the post-backup-retry-script if present. try: retry_script = RetryHookScriptRunner( self, 'backup_retry_script', 'post') retry_script.env_from_backup_info(backup_info) retry_script.run() except AbortedRetryHookScript as e: # Ignore the ABORT_STOP as it is a post-hook operation _logger.warning("Ignoring stop request after receiving " "abort (exit code %d) from post-backup " "retry hook script: %s", e.hook.exit_status, e.hook.script) # Run the post-backup-script if present. script = HookScriptRunner(self, 'backup_script', 'post') script.env_from_backup_info(backup_info) script.run() output.result('backup', backup_info)
def recover(self, backup_info, dest, tablespaces=None, remote_command=None, **kwargs): """ Performs a recovery of a backup :param barman.infofile.LocalBackupInfo backup_info: the backup to recover :param str dest: the destination directory :param dict[str,str]|None tablespaces: a tablespace name -> location map (for relocation) :param str|None remote_command: default None. The remote command to recover the base backup, in case of remote backup. :kwparam str|None target_tli: the target timeline :kwparam str|None target_time: the target time :kwparam str|None target_xid: the target xid :kwparam str|None target_lsn: the target LSN :kwparam str|None target_name: the target name created previously with pg_create_restore_point() function call :kwparam bool|None target_immediate: end recovery as soon as consistency is reached :kwparam bool exclusive: whether the recovery is exclusive or not :kwparam str|None target_action: default None. The recovery target action :kwparam bool|None standby_mode: the standby mode if needed """ # Archive every WAL files in the incoming directory of the server self.server.archive_wal(verbose=False) # Delegate the recovery operation to a RecoveryExecutor object executor = RecoveryExecutor(self) # Run the pre_recovery_script if present. script = HookScriptRunner(self, 'recovery_script', 'pre') script.env_from_recover(backup_info, dest, tablespaces, remote_command, **kwargs) script.run() # Run the pre_recovery_retry_script if present. retry_script = RetryHookScriptRunner(self, 'recovery_retry_script', 'pre') retry_script.env_from_recover(backup_info, dest, tablespaces, remote_command, **kwargs) retry_script.run() # Execute the recovery. # We use a closing context to automatically remove # any resource eventually allocated during recovery. with closing(executor): recovery_info = executor.recover(backup_info, dest, tablespaces=tablespaces, remote_command=remote_command, **kwargs) # Run the post_recovery_retry_script if present. try: retry_script = RetryHookScriptRunner(self, 'recovery_retry_script', 'post') retry_script.env_from_recover(backup_info, dest, tablespaces, remote_command, **kwargs) retry_script.run() except AbortedRetryHookScript as e: # Ignore the ABORT_STOP as it is a post-hook operation _logger.warning( "Ignoring stop request after receiving " "abort (exit code %d) from post-recovery " "retry hook script: %s", e.hook.exit_status, e.hook.script) # Run the post-recovery-script if present. script = HookScriptRunner(self, 'recovery_script', 'post') script.env_from_recover(backup_info, dest, tablespaces, remote_command, **kwargs) script.run() # Output recovery results output.result('recovery', recovery_info['results'])
def backup(self, wait=False, wait_timeout=None): """ Performs a backup for the server :param bool wait: wait for all the required WAL files to be archived :param int|None wait_timeout: :return BackupInfo: the generated BackupInfo """ _logger.debug("initialising backup information") self.executor.init() backup_info = None try: # Create the BackupInfo object representing the backup backup_info = LocalBackupInfo( self.server, backup_id=datetime.datetime.now().strftime('%Y%m%dT%H%M%S')) backup_info.set_attribute('systemid', self.server.systemid) backup_info.save() self.backup_cache_add(backup_info) output.info("Starting backup using %s method for server %s in %s", self.mode, self.config.name, backup_info.get_basebackup_directory()) # Run the pre-backup-script if present. script = HookScriptRunner(self, 'backup_script', 'pre') script.env_from_backup_info(backup_info) script.run() # Run the pre-backup-retry-script if present. retry_script = RetryHookScriptRunner(self, 'backup_retry_script', 'pre') retry_script.env_from_backup_info(backup_info) retry_script.run() # Do the backup using the BackupExecutor self.executor.backup(backup_info) # Create a restore point after a backup target_name = 'barman_%s' % backup_info.backup_id self.server.postgres.create_restore_point(target_name) # Free the Postgres connection self.server.postgres.close() # Compute backup size and fsync it on disk self.backup_fsync_and_set_sizes(backup_info) # Mark the backup as WAITING_FOR_WALS backup_info.set_attribute("status", BackupInfo.WAITING_FOR_WALS) # Use BaseException instead of Exception to catch events like # KeyboardInterrupt (e.g.: CTRL-C) except BaseException as e: msg_lines = force_str(e).strip().splitlines() # If the exception has no attached message use the raw # type name if len(msg_lines) == 0: msg_lines = [type(e).__name__] if backup_info: # Use only the first line of exception message # in backup_info error field backup_info.set_attribute("status", BackupInfo.FAILED) backup_info.set_attribute( "error", "failure %s (%s)" % (self.executor.current_action, msg_lines[0])) output.error("Backup failed %s.\nDETAILS: %s", self.executor.current_action, '\n'.join(msg_lines)) else: output.info("Backup end at LSN: %s (%s, %08X)", backup_info.end_xlog, backup_info.end_wal, backup_info.end_offset) executor = self.executor output.info( "Backup completed (start time: %s, elapsed time: %s)", self.executor.copy_start_time, human_readable_timedelta(datetime.datetime.now() - executor.copy_start_time)) # If requested, wait for end_wal to be archived if wait: try: self.server.wait_for_wal(backup_info.end_wal, wait_timeout) self.check_backup(backup_info) except KeyboardInterrupt: # Ignore CTRL-C pressed while waiting for WAL files output.info( "Got CTRL-C. Continuing without waiting for '%s' " "to be archived", backup_info.end_wal) finally: if backup_info: backup_info.save() # Make sure we are not holding any PostgreSQL connection # during the post-backup scripts self.server.close() # Run the post-backup-retry-script if present. try: retry_script = RetryHookScriptRunner( self, 'backup_retry_script', 'post') retry_script.env_from_backup_info(backup_info) retry_script.run() except AbortedRetryHookScript as e: # Ignore the ABORT_STOP as it is a post-hook operation _logger.warning( "Ignoring stop request after receiving " "abort (exit code %d) from post-backup " "retry hook script: %s", e.hook.exit_status, e.hook.script) # Run the post-backup-script if present. script = HookScriptRunner(self, 'backup_script', 'post') script.env_from_backup_info(backup_info) script.run() output.result('backup', backup_info) return backup_info
def delete_backup(self, backup): """ Delete a backup :param backup: the backup to delete :return bool: True if deleted, False if could not delete the backup """ available_backups = self.get_available_backups( status_filter=(BackupInfo.DONE, )) minimum_redundancy = self.server.config.minimum_redundancy # Honour minimum required redundancy if backup.status == BackupInfo.DONE and \ minimum_redundancy >= len(available_backups): output.warning( "Skipping delete of backup %s for server %s " "due to minimum redundancy requirements " "(minimum redundancy = %s, " "current redundancy = %s)", backup.backup_id, self.config.name, minimum_redundancy, len(available_backups)) return False # Keep track of when the delete operation started. delete_start_time = datetime.datetime.now() # Run the pre_delete_script if present. script = HookScriptRunner(self, 'delete_script', 'pre') script.env_from_backup_info(backup) script.run() # Run the pre_delete_retry_script if present. retry_script = RetryHookScriptRunner(self, 'delete_retry_script', 'pre') retry_script.env_from_backup_info(backup) retry_script.run() output.info("Deleting backup %s for server %s", backup.backup_id, self.config.name) previous_backup = self.get_previous_backup(backup.backup_id) next_backup = self.get_next_backup(backup.backup_id) # Delete all the data contained in the backup try: self.delete_backup_data(backup) except OSError as e: output.error("Failure deleting backup %s for server %s.\n%s", backup.backup_id, self.config.name, e) return False # Check if we are deleting the first available backup if not previous_backup: # In the case of exclusive backup (default), removes any WAL # files associated to the backup being deleted. # In the case of concurrent backup, removes only WAL files # prior to the start of the backup being deleted, as they # might be useful to any concurrent backup started immediately # after. remove_until = None # means to remove all WAL files if next_backup: remove_until = next_backup elif BackupOptions.CONCURRENT_BACKUP in self.config.backup_options: remove_until = backup timelines_to_protect = set() # If remove_until is not set there are no backup left if remove_until: # Retrieve the list of extra timelines that contains at least # a backup. On such timelines we don't want to delete any WAL for value in self.get_available_backups( BackupInfo.STATUS_ARCHIVING).values(): # Ignore the backup that is being deleted if value == backup: continue timelines_to_protect.add(value.timeline) # Remove the timeline of `remove_until` from the list. # We have enough information to safely delete unused WAL files # on it. timelines_to_protect -= set([remove_until.timeline]) output.info("Delete associated WAL segments:") for name in self.remove_wal_before_backup(remove_until, timelines_to_protect): output.info("\t%s", name) # As last action, remove the backup directory, # ending the delete operation try: self.delete_basebackup(backup) except OSError as e: output.error( "Failure deleting backup %s for server %s.\n%s\n" "Please manually remove the '%s' directory", backup.backup_id, self.config.name, e, backup.get_basebackup_directory()) return False self.backup_cache_remove(backup) # Save the time of the complete removal of the backup delete_end_time = datetime.datetime.now() output.info( "Deleted backup %s (start time: %s, elapsed time: %s)", backup.backup_id, delete_start_time.ctime(), human_readable_timedelta(delete_end_time - delete_start_time)) # Remove the sync lockfile if exists sync_lock = ServerBackupSyncLock(self.config.barman_lock_directory, self.config.name, backup.backup_id) if os.path.exists(sync_lock.filename): _logger.debug("Deleting backup sync lockfile: %s" % sync_lock.filename) os.unlink(sync_lock.filename) # Run the post_delete_retry_script if present. try: retry_script = RetryHookScriptRunner(self, 'delete_retry_script', 'post') retry_script.env_from_backup_info(backup) retry_script.run() except AbortedRetryHookScript as e: # Ignore the ABORT_STOP as it is a post-hook operation _logger.warning( "Ignoring stop request after receiving " "abort (exit code %d) from post-delete " "retry hook script: %s", e.hook.exit_status, e.hook.script) # Run the post_delete_script if present. script = HookScriptRunner(self, 'delete_script', 'post') script.env_from_backup_info(backup) script.run() return True
def backup(self): """ Performs a backup for the server """ _logger.debug("initialising backup information") self.executor.init() backup_info = None try: # Create the BackupInfo object representing the backup backup_info = BackupInfo( self.server, backup_id=datetime.datetime.now().strftime('%Y%m%dT%H%M%S')) backup_info.save() self.backup_cache_add(backup_info) output.info("Starting backup using %s method for server %s in %s", self.mode, self.config.name, backup_info.get_basebackup_directory()) # Run the pre-backup-script if present. script = HookScriptRunner(self, 'backup_script', 'pre') script.env_from_backup_info(backup_info) script.run() # Run the pre-backup-retry-script if present. retry_script = RetryHookScriptRunner(self, 'backup_retry_script', 'pre') retry_script.env_from_backup_info(backup_info) retry_script.run() # Do the backup using the BackupExecutor self.executor.backup(backup_info) # Compute backup size and fsync it on disk self.backup_fsync_and_set_sizes(backup_info) # Mark the backup as DONE backup_info.set_attribute("status", "DONE") # Use BaseException instead of Exception to catch events like # KeyboardInterrupt (e.g.: CRTL-C) except BaseException as e: msg_lines = str(e).strip().splitlines() if backup_info: # Use only the first line of exception message # in backup_info error field backup_info.set_attribute("status", "FAILED") # If the exception has no attached message use the raw # type name if len(msg_lines) == 0: msg_lines = [type(e).__name__] backup_info.set_attribute( "error", "failure %s (%s)" % (self.executor.current_action, msg_lines[0])) output.error("Backup failed %s.\nDETAILS: %s\n%s", self.executor.current_action, msg_lines[0], '\n'.join(msg_lines[1:])) else: output.info("Backup end at LSN: %s (%s, %08X)", backup_info.end_xlog, backup_info.end_wal, backup_info.end_offset) output.info( "Backup completed (start time: %s, elapsed time: %s)", self.executor.copy_start_time, human_readable_timedelta(self.executor.copy_end_time - self.executor.copy_start_time)) # Create a restore point after a backup target_name = 'barman_%s' % backup_info.backup_id self.server.postgres.create_restore_point(target_name) finally: if backup_info: backup_info.save() # Make sure we are not holding any PostgreSQL connection # during the post-backup scripts self.server.close() # Run the post-backup-retry-script if present. try: retry_script = RetryHookScriptRunner( self, 'backup_retry_script', 'post') retry_script.env_from_backup_info(backup_info) retry_script.run() except AbortedRetryHookScript as e: # Ignore the ABORT_STOP as it is a post-hook operation _logger.warning( "Ignoring stop request after receiving " "abort (exit code %d) from post-backup " "retry hook script: %s", e.hook.exit_status, e.hook.script) # Run the post-backup-script if present. script = HookScriptRunner(self, 'backup_script', 'post') script.env_from_backup_info(backup_info) script.run() output.result('backup', backup_info)
def cron_wal_archival(self, compressor, wal_info): """ Archive a WAL segment from the incoming directory. This function returns a WalFileInfo object. :param compressor: the compressor for the file (if any) :param wal_info: WalFileInfo of the WAL file is being processed """ dest_file = wal_info.fullpath(self.server) dest_dir = os.path.dirname(dest_file) srcfile = os.path.join(self.config.incoming_wals_directory, wal_info.name) error = None try: # Run the pre_archive_script if present. script = HookScriptRunner(self, 'archive_script', 'pre') script.env_from_wal_info(wal_info, srcfile) script.run() # Run the pre_archive_retry_script if present. retry_script = RetryHookScriptRunner(self, 'archive_retry_script', 'pre') retry_script.env_from_wal_info(wal_info, srcfile) retry_script.run() mkpath(dest_dir) if compressor: compressor.compress(srcfile, dest_file) shutil.copystat(srcfile, dest_file) os.unlink(srcfile) else: shutil.move(srcfile, dest_file) # Execute fsync() on the archived WAL containing directory fsync_dir(dest_dir) # Execute fsync() also on the incoming directory fsync_dir(self.config.incoming_wals_directory) # Execute fsync() on the archived WAL file file_fd = os.open(dest_file, os.O_RDONLY) os.fsync(file_fd) os.close(file_fd) stat = os.stat(dest_file) wal_info.size = stat.st_size wal_info.compression = compressor and compressor.compression except Exception as e: # In case of failure save the exception for the post sripts error = e raise # Ensure the execution of the post_archive_retry_script and # the post_archive_script finally: # Run the post_archive_retry_script if present. try: retry_script = RetryHookScriptRunner(self, 'archive_retry_script', 'post') retry_script.env_from_wal_info(wal_info, dest_file, error) retry_script.run() except AbortedRetryHookScript, e: # Ignore the ABORT_STOP as it is a post-hook operation _logger.warning("Ignoring stop request after receiving " "abort (exit code %d) from post-archive " "retry hook script: %s", e.hook.exit_status, e.hook.script) # Run the post_archive_script if present. script = HookScriptRunner(self, 'archive_script', 'post', error) script.env_from_wal_info(wal_info, dest_file) script.run()
def archive_wal(self, compressor, wal_info): """ Archive a WAL segment and update the wal_info object :param compressor: the compressor for the file (if any) :param WalFileInfo wal_info: the WAL file is being processed """ src_file = wal_info.orig_filename src_dir = os.path.dirname(src_file) dst_file = wal_info.fullpath(self.server) tmp_file = dst_file + '.tmp' dst_dir = os.path.dirname(dst_file) error = None try: # Run the pre_archive_script if present. script = HookScriptRunner(self.backup_manager, 'archive_script', 'pre') script.env_from_wal_info(wal_info, src_file) script.run() # Run the pre_archive_retry_script if present. retry_script = RetryHookScriptRunner(self.backup_manager, 'archive_retry_script', 'pre') retry_script.env_from_wal_info(wal_info, src_file) retry_script.run() # Check if destination already exists if os.path.exists(dst_file): src_uncompressed = src_file dst_uncompressed = dst_file dst_info = WalFileInfo.from_file(dst_file) try: comp_manager = self.backup_manager.compression_manager if dst_info.compression is not None: dst_uncompressed = dst_file + '.uncompressed' comp_manager.get_compressor( compression=dst_info.compression).decompress( dst_file, dst_uncompressed) if wal_info.compression: src_uncompressed = src_file + '.uncompressed' comp_manager.get_compressor( compression=wal_info.compression).decompress( src_file, src_uncompressed) # Directly compare files. # When the files are identical # raise a MatchingDuplicateWalFile exception, # otherwise raise a DuplicateWalFile exception. if filecmp.cmp(dst_uncompressed, src_uncompressed): raise MatchingDuplicateWalFile(wal_info) else: raise DuplicateWalFile(wal_info) finally: if src_uncompressed != src_file: os.unlink(src_uncompressed) if dst_uncompressed != dst_file: os.unlink(dst_uncompressed) mkpath(dst_dir) # Compress the file only if not already compressed if compressor and not wal_info.compression: compressor.compress(src_file, tmp_file) shutil.copystat(src_file, tmp_file) os.rename(tmp_file, dst_file) os.unlink(src_file) # Update wal_info stat = os.stat(dst_file) wal_info.size = stat.st_size wal_info.compression = compressor.compression else: # Try to atomically rename the file. If successful, # the renaming will be an atomic operation # (this is a POSIX requirement). try: os.rename(src_file, dst_file) except OSError: # Source and destination are probably on different # filesystems shutil.copy2(src_file, tmp_file) os.rename(tmp_file, dst_file) os.unlink(src_file) # At this point the original file has been removed wal_info.orig_filename = None # Execute fsync() on the archived WAL file file_fd = os.open(dst_file, os.O_RDONLY) os.fsync(file_fd) os.close(file_fd) # Execute fsync() on the archived WAL containing directory fsync_dir(dst_dir) # Execute fsync() also on the incoming directory fsync_dir(src_dir) except Exception as e: # In case of failure save the exception for the post scripts error = e raise # Ensure the execution of the post_archive_retry_script and # the post_archive_script finally: # Run the post_archive_retry_script if present. try: retry_script = RetryHookScriptRunner(self, 'archive_retry_script', 'post') retry_script.env_from_wal_info(wal_info, dst_file, error) retry_script.run() except AbortedRetryHookScript as e: # Ignore the ABORT_STOP as it is a post-hook operation _logger.warning("Ignoring stop request after receiving " "abort (exit code %d) from post-archive " "retry hook script: %s", e.hook.exit_status, e.hook.script) # Run the post_archive_script if present. script = HookScriptRunner(self, 'archive_script', 'post', error) script.env_from_wal_info(wal_info, dst_file) script.run()
def archive_wal(self, compressor, wal_info): """ Archive a WAL segment and update the wal_info object :param compressor: the compressor for the file (if any) :param WalFileInfo wal_info: the WAL file is being processed """ src_file = wal_info.orig_filename src_dir = os.path.dirname(src_file) dst_file = wal_info.fullpath(self.server) tmp_file = dst_file + '.tmp' dst_dir = os.path.dirname(dst_file) error = None try: # Run the pre_archive_script if present. script = HookScriptRunner(self.backup_manager, 'archive_script', 'pre') script.env_from_wal_info(wal_info, src_file) script.run() # Run the pre_archive_retry_script if present. retry_script = RetryHookScriptRunner(self.backup_manager, 'archive_retry_script', 'pre') retry_script.env_from_wal_info(wal_info, src_file) retry_script.run() # Check if destination already exists if os.path.exists(dst_file): src_uncompressed = src_file dst_uncompressed = dst_file dst_info = WalFileInfo.from_file(dst_file) try: comp_manager = self.backup_manager.compression_manager if dst_info.compression is not None: dst_uncompressed = dst_file + '.uncompressed' comp_manager.get_compressor( compression=dst_info.compression).decompress( dst_file, dst_uncompressed) if wal_info.compression: src_uncompressed = src_file + '.uncompressed' comp_manager.get_compressor( compression=wal_info.compression).decompress( src_file, src_uncompressed) # Directly compare files. # When the files are identical # raise a MatchingDuplicateWalFile exception, # otherwise raise a DuplicateWalFile exception. if filecmp.cmp(dst_uncompressed, src_uncompressed): raise MatchingDuplicateWalFile(wal_info) else: raise DuplicateWalFile(wal_info) finally: if src_uncompressed != src_file: os.unlink(src_uncompressed) if dst_uncompressed != dst_file: os.unlink(dst_uncompressed) mkpath(dst_dir) # Compress the file only if not already compressed if compressor and not wal_info.compression: compressor.compress(src_file, tmp_file) shutil.copystat(src_file, tmp_file) os.rename(tmp_file, dst_file) os.unlink(src_file) # Update wal_info stat = os.stat(dst_file) wal_info.size = stat.st_size wal_info.compression = compressor.compression else: # Try to atomically rename the file. If successful, # the renaming will be an atomic operation # (this is a POSIX requirement). try: os.rename(src_file, dst_file) except OSError: # Source and destination are probably on different # filesystems shutil.copy2(src_file, tmp_file) os.rename(tmp_file, dst_file) os.unlink(src_file) # At this point the original file has been removed wal_info.orig_filename = None # Execute fsync() on the archived WAL file file_fd = os.open(dst_file, os.O_RDONLY) os.fsync(file_fd) os.close(file_fd) # Execute fsync() on the archived WAL containing directory fsync_dir(dst_dir) # Execute fsync() also on the incoming directory fsync_dir(src_dir) except Exception as e: # In case of failure save the exception for the post scripts error = e raise # Ensure the execution of the post_archive_retry_script and # the post_archive_script finally: # Run the post_archive_retry_script if present. try: retry_script = RetryHookScriptRunner(self, 'archive_retry_script', 'post') retry_script.env_from_wal_info(wal_info, dst_file, error) retry_script.run() except AbortedRetryHookScript as e: # Ignore the ABORT_STOP as it is a post-hook operation _logger.warning( "Ignoring stop request after receiving " "abort (exit code %d) from post-archive " "retry hook script: %s", e.hook.exit_status, e.hook.script) # Run the post_archive_script if present. script = HookScriptRunner(self, 'archive_script', 'post', error) script.env_from_wal_info(wal_info, dst_file) script.run()