def _generate_archive_status(self, recovery_info, remote_command,
                                 required_xlog_files):
        """
        Populate the archive_status directory

        :param dict recovery_info: Dictionary containing all the recovery
            parameters
        :param str remote_command: ssh command for remote connection
        :param tuple required_xlog_files: list of required WAL segments
        """
        if remote_command:
            status_dir = recovery_info['tempdir']
        else:
            status_dir = os.path.join(recovery_info['wal_dest'],
                                      'archive_status')
            mkpath(status_dir)
        for wal_info in required_xlog_files:
            with open(os.path.join(status_dir, "%s.done" % wal_info.name),
                      'a') as f:
                f.write('')
        if remote_command:
            try:
                recovery_info['rsync']('%s/' % status_dir,
                                       ':%s' % os.path.join(
                                           recovery_info['wal_dest'],
                                           'archive_status'))
            except CommandFailedException as e:
                output.error("unable to populate pg_xlog/archive_status "
                             "directory: %s", e)
                output.close_and_exit()
    def _copy_temporary_config_files(self, dest,
                                     remote_command, recovery_info):
        """
        Copy modified configuration files using rsync in case of
        remote recovery

        :param str dest: destination directory of the recovery
        :param str remote_command: ssh command for remote connection
        :param dict recovery_info: Dictionary containing all the recovery
            parameters
        """
        if remote_command:
            # If this is a remote recovery, rsync the modified files from the
            # temporary local directory to the remote destination directory.
            file_list = []
            for conf_file in recovery_info['configuration_files']:
                file_list.append('%s' % conf_file)
                file_list.append('%s.origin' % conf_file)

            try:
                recovery_info['rsync'].from_file_list(file_list,
                                                      recovery_info['tempdir'],
                                                      ':%s' % dest)
            except CommandFailedException as e:
                output.error('remote copy of configuration files failed: %s',
                             e)
                output.close_and_exit()
Example #3
0
    def delete_backup(self, backup):
        """
        Delete a backup

        :param backup: the backup to delete
        """
        available_backups = self.get_available_backups()
        minimum_redundancy = self.server.config.minimum_redundancy
        # Honour minimum required redundancy
        if backup.status == BackupInfo.DONE and \
                minimum_redundancy >= len(available_backups):
            output.warning("Skipping delete of backup %s for server %s "
                           "due to minimum redundancy requirements "
                           "(minimum redundancy = %s, "
                           "current redundancy = %s)",
                           backup.backup_id,
                           self.config.name,
                           len(available_backups),
                           minimum_redundancy)
            return

        output.info("Deleting backup %s for server %s",
                    backup.backup_id, self.config.name)
        previous_backup = self.get_previous_backup(backup.backup_id)
        next_backup = self.get_next_backup(backup.backup_id)
        # Delete all the data contained in the backup
        try:
            self.delete_backup_data(backup)
        except OSError as e:
            output.error("Failure deleting backup %s for server %s.\n%s",
                         backup.backup_id, self.config.name, e)
            return
        # Check if we are deleting the first available backup
        if not previous_backup:
            # In the case of exclusive backup (default), removes any WAL
            # files associated to the backup being deleted.
            # In the case of concurrent backup, removes only WAL files
            # prior to the start of the backup being deleted, as they
            # might be useful to any concurrent backup started immediately
            # after.
            remove_until = None  # means to remove all WAL files
            if next_backup:
                remove_until = next_backup
            elif BackupOptions.CONCURRENT_BACKUP in self.config.backup_options:
                remove_until = backup
            output.info("Delete associated WAL segments:")
            for name in self.remove_wal_before_backup(remove_until):
                output.info("\t%s", name)
        # As last action, remove the backup directory,
        # ending the delete operation
        try:
            self.delete_basebackup(backup)
        except OSError as e:
            output.error("Failure deleting backup %s for server %s.\n%s\n"
                         "Please manually remove the '%s' directory",
                         backup.backup_id, self.config.name, e,
                         backup.get_basebackup_directory())
            return
        self.backup_cache_remove(backup)
        output.info("Done")
Example #4
0
def backup(args):
    """
    Perform a full backup for the given server
    """
    servers = get_server_list(args, skip_disabled=True)
    for name in sorted(servers):
        server = servers[name]
        if server is None:
            output.error("Unknown server '%s'" % name)
            continue
        # If the server is disabled return an error message
        if not server.config.active:
            output.error(
                "Server '%s' is disabled.\n"
                "HINT: remove 'active=False' from server configuration "
                "to enable it.",
                name)
            continue
        if args.reuse_backup is not None:
            server.config.reuse_backup = args.reuse_backup
        if args.retry_sleep is not None:
            server.config.basebackup_retry_sleep = args.retry_sleep
        if args.retry_times is not None:
            server.config.basebackup_retry_times = args.retry_times
        if hasattr(args, 'immediate_checkpoint'):
            server.config.immediate_checkpoint = args.immediate_checkpoint
        server.backup()
    output.close_and_exit()
Example #5
0
def get_server(args, skip_inactive=True, skip_disabled=False,
               on_error_stop=True, suppress_error=False):
    """
    Get a single server retrieving its configuration (wraps get_server_list())

    Returns a Server object or None if the required server is unknown and
    on_error_stop is False.

    WARNING: this function modifies the 'args' parameter

    :param args: an argparse namespace containing a single
        server_name parameter
        WARNING: the function modifies the content of this parameter
    :param bool skip_inactive: skip inactive servers when 'all' is required
    :param bool skip_disabled: skip disabled servers when 'all' is required
    :param bool on_error_stop: stop if an error is found
    :param bool suppress_error: suppress display of errors (e.g. diagnose)
    :rtype: barman.server.Server|None
    """
    # This function must to be called with in a single-server context
    name = args.server_name
    assert isinstance(name, str)

    # The 'all' special name is forbidden in this context
    if name == 'all':
        output.error("You cannot use 'all' in a single server context")
        output.close_and_exit()
        # The following return statement will never be reached
        # but it is here for clarity
        return None

    # Builds a list from a single given name
    args.server_name = [name]

    # Retrieve the requested server
    servers = get_server_list(args, skip_inactive, skip_disabled,
                              on_error_stop, suppress_error)

    # The requested server has been excluded from get_server_list result
    if len(servers) == 0:
        output.close_and_exit()
        # The following return statement will never be reached
        # but it is here for clarity
        return None

    # retrieve the server object
    server = servers[name]

    # Apply standard validation control and skips
    # the server if inactive or disabled, displaying standard
    # error messages. If on_error_stop (default) exits
    if not manage_server_command(server, name) and on_error_stop:
        output.close_and_exit()
        # The following return statement will never be reached
        # but it is here for clarity
        return None

    # Returns the filtered server
    return server
    def setup(self, backup_info, remote_command, dest):
        """
        Prepare the recovery_info dictionary for the recovery, as well
        as temporary working directory

        :param barman.infofile.BackupInfo backup_info: representation of a
            backup
        :param str remote_command: ssh command for remote connection
        :return dict: recovery_info dictionary, holding the basic values for a
            recovery
        """
        recovery_info = {
            'cmd': None,
            'recovery_dest': 'local',
            'rsync': None,
            'configuration_files': [],
            'destination_path': dest,
            'temporary_configuration_files': [],
            'tempdir': tempfile.mkdtemp(prefix='barman_recovery-'),
            'is_pitr': False,
            'wal_dest': os.path.join(dest, 'pg_xlog'),
            'get_wal': RecoveryOptions.GET_WAL in self.config.recovery_options,
        }
        # A map that will keep track of the results of the recovery.
        # Used for output generation
        results = {
            'changes': [],
            'warnings': [],
            'delete_barman_xlog': False,
            'get_wal': False,
        }
        recovery_info['results'] = results

        # Set up a list of configuration files
        recovery_info['configuration_files'].append('postgresql.conf')
        if backup_info.version >= 90400:
            recovery_info['configuration_files'].append('postgresql.auto.conf')

        # Handle remote recovery options
        if remote_command:
            recovery_info['recovery_dest'] = 'remote'
            recovery_info['rsync'] = RsyncPgData(
                path=self.server.path,
                ssh=remote_command,
                bwlimit=self.config.bandwidth_limit,
                network_compression=self.config.network_compression)
            try:
                # create a UnixRemoteCommand obj if is a remote recovery
                recovery_info['cmd'] = UnixRemoteCommand(remote_command)
            except FsOperationFailed:
                output.error(
                    "Unable to connect to the target host using the command "
                    "'%s'", remote_command)
                output.close_and_exit()
        else:
            # if is a local recovery create a UnixLocalCommand
            recovery_info['cmd'] = UnixLocalCommand()

        return recovery_info
Example #7
0
    def backup(self):
        """
        Performs a backup for the server
        """
        _logger.debug("initialising backup information")
        self.executor.init()
        backup_info = None
        try:
            # Create the BackupInfo object representing the backup
            backup_info = BackupInfo(
                self.server,
                backup_id=datetime.datetime.now().strftime('%Y%m%dT%H%M%S'))
            backup_info.save()
            self.backup_cache_add(backup_info)
            output.info(
                "Starting backup for server %s in %s",
                self.config.name,
                backup_info.get_basebackup_directory())

            # Run the pre-backup-script if present.
            script = HookScriptRunner(self, 'backup_script', 'pre')
            script.env_from_backup_info(backup_info)
            script.run()

            # Run the pre-backup-retry-script if present.
            retry_script = RetryHookScriptRunner(
                self, 'backup_retry_script', 'pre')
            retry_script.env_from_backup_info(backup_info)
            retry_script.run()

            # Do the backup using the BackupExecutor
            self.executor.backup(backup_info)

            # Compute backup size and fsync it on disk
            self.backup_fsync_and_set_sizes(backup_info)

            # Mark the backup as DONE
            backup_info.set_attribute("status", "DONE")
        # Use BaseException instead of Exception to catch events like
        # KeyboardInterrupt (e.g.: CRTL-C)
        except BaseException, e:
            msg_lines = str(e).strip().splitlines()
            if backup_info:
                # Use only the first line of exception message
                # in backup_info error field
                backup_info.set_attribute("status", "FAILED")
                # If the exception has no attached message use the raw type name
                if len(msg_lines) == 0:
                    msg_lines = [type(e).__name__]
                backup_info.set_attribute(
                    "error",
                    "failure %s (%s)" % (
                        self.executor.current_action, msg_lines[0]))

            output.error("Backup failed %s.\nDETAILS: %s\n%s",
                         self.executor.current_action, msg_lines[0],
                         '\n'.join(msg_lines[1:]))
Example #8
0
    def remove_wal_before_backup(self, backup_info, timelines_to_protect=None):
        """
        Remove WAL files which have been archived before the start of
        the provided backup.

        If no backup_info is provided delete all available WAL files

        If timelines_to_protect list is passed, never remove a wal in one of
        these timelines.

        :param BackupInfo|None backup_info: the backup information structure
        :param set timelines_to_protect: optional list of timelines
            to protect
        :return list: a list of removed WAL files
        """
        removed = []
        with self.server.xlogdb() as fxlogdb:
            xlogdb_new = fxlogdb.name + ".new"
            with open(xlogdb_new, 'w') as fxlogdb_new:
                for line in fxlogdb:
                    wal_info = WalFileInfo.from_xlogdb_line(line)
                    if not xlog.is_any_xlog_file(wal_info.name):
                        output.error(
                            "invalid xlog segment name %r\n"
                            "HINT: Please run \"barman rebuild-xlogdb %s\" "
                            "to solve this issue",
                            wal_info.name, self.config.name)
                        continue

                    # Keeps the WAL segment if it is a history file
                    keep = xlog.is_history_file(wal_info.name)

                    # Keeps the WAL segment if its timeline is in
                    # `timelines_to_protect`
                    if timelines_to_protect:
                        tli, _, _ = xlog.decode_segment_name(wal_info.name)
                        keep |= tli in timelines_to_protect

                    # Keeps the WAL segment if it is a newer
                    # than the given backup (the first available)
                    if backup_info:
                        keep |= wal_info.name >= backup_info.begin_wal

                    # If the file has to be kept write it in the new xlogdb
                    # otherwise delete it  and record it in the removed list
                    if keep:
                        fxlogdb_new.write(wal_info.to_xlogdb_line())
                    else:
                        self.delete_wal(wal_info)
                        removed.append(wal_info.name)
                fxlogdb_new.flush()
                os.fsync(fxlogdb_new.fileno())
            shutil.move(xlogdb_new, fxlogdb.name)
            fsync_dir(os.path.dirname(fxlogdb.name))
        return removed
Example #9
0
 def backup(self):
     """
     Performs a backup for the server
     """
     try:
         # check required backup directories exist
         self._make_directories()
     except OSError, e:
         output.error('failed to create %s directory: %s',
                      e.filename, e.strerror)
         return
Example #10
0
def main():
    """
    The main method of Barman
    """
    p = ArghParser(epilog='Barman by 2ndQuadrant (www.2ndQuadrant.com)')
    p.add_argument('-v', '--version', action='version',
                   version='%s\n\nBarman by 2ndQuadrant (www.2ndQuadrant.com)'
                           % barman.__version__)
    p.add_argument('-c', '--config',
                   help='uses a configuration file '
                        '(defaults: %s)'
                        % ', '.join(barman.config.Config.CONFIG_FILES),
                   default=SUPPRESS)
    p.add_argument('-q', '--quiet', help='be quiet', action='store_true')
    p.add_argument('-d', '--debug', help='debug output', action='store_true')
    p.add_argument('-f', '--format', help='output format',
                   choices=output.AVAILABLE_WRITERS.keys(),
                   default=output.DEFAULT_WRITER)
    p.add_commands(
        [
            archive_wal,
            backup,
            check,
            cron,
            delete,
            diagnose,
            get_wal,
            list_backup,
            list_files,
            list_server,
            rebuild_xlogdb,
            receive_wal,
            recover,
            show_backup,
            show_server,
            replication_status,
            status,
            switch_xlog,
        ]
    )
    # noinspection PyBroadException
    try:
        p.dispatch(pre_call=global_config)
    except KeyboardInterrupt:
        msg = "Process interrupted by user (KeyboardInterrupt)"
        output.error(msg)
    except Exception as e:
        msg = "%s\nSee log file for more details." % e
        output.exception(msg)

    # cleanup output API and exit honoring output.error_occurred and
    # output.error_exit_code
    output.close_and_exit()
Example #11
0
def rebuild_xlogdb(args):
    """
    Rebuild the WAL file database guessing it from the disk content.
    """
    servers = get_server_list(args)
    for name in sorted(servers):
        server = servers[name]
        if server is None:
            output.error("Unknown server '%s'", name)
            continue
        server.rebuild_xlogdb()
    output.close_and_exit()
Example #12
0
File: cli.py Project: ozbek/barman
def delete(args):
    """
    Delete a backup
    """
    server = get_server(args)

    # Retrieves the backup
    backup_id = parse_backup_id(server, args)
    with closing(server):
        if not server.delete_backup(backup_id):
            output.error("Cannot delete backup (%s %s)" %
                         (server.config.name, backup_id))
    output.close_and_exit()
Example #13
0
def list_backup(args):
    """
    List available backups for the given server (supports 'all')
    """
    servers = get_server_list(args)
    for name in sorted(servers):
        server = servers[name]
        output.init('list_backup', name, minimal=args.minimal)
        if server is None:
            output.error("Unknown server '%s'" % name)
            continue
        server.list_backups()
    output.close_and_exit()
Example #14
0
def show_server(args):
    """
    Show all configuration parameters for the specified servers
    """
    servers = get_server_list(args)
    for name in sorted(servers):
        server = servers[name]
        if server is None:
            output.error("Unknown server '%s'" % name)
            continue
        output.init('show_server', name)
        server.show()
    output.close_and_exit()
Example #15
0
def status(args):
    """
    Shows live information and status of the PostgreSQL server
    """
    servers = get_server_list(args)
    for name in sorted(servers):
        server = servers[name]
        if server is None:
            output.error("Unknown server '%s'" % name)
            continue
        output.init('status', name)
        server.status()
    output.close_and_exit()
Example #16
0
    def _prepare_tablespaces(self, backup_info, cmd, dest, tablespaces):
        """
        Prepare the directory structure for required tablespaces,
        taking care of tablespaces relocation, if requested.

        :param barman.infofile.BackupInfo backup_info: backup representation
        :param barman.fs.UnixLocalCommand cmd: Object for
            filesystem interaction
        :param str dest: destination dir for the recovery
        :param dict tablespaces: dict of all the tablespaces and their location
        """
        tblspc_dir = os.path.join(dest, 'pg_tblspc')
        try:
            # check for pg_tblspc dir into recovery destination folder.
            # if it does not exists, create it
            cmd.create_dir_if_not_exists(tblspc_dir)
        except FsOperationFailed as e:
            output.error(
                "unable to initialise tablespace directory "
                "'%s': %s", tblspc_dir, e)
            output.close_and_exit()
        for item in backup_info.tablespaces:

            # build the filename of the link under pg_tblspc directory
            pg_tblspc_file = os.path.join(tblspc_dir, str(item.oid))

            # by default a tablespace goes in the same location where
            # it was on the source server when the backup was taken
            location = item.location

            # if a relocation has been requested for this tablespace,
            # use the target directory provided by the user
            if tablespaces and item.name in tablespaces:
                location = tablespaces[item.name]

            try:
                # remove the current link in pg_tblspc, if it exists
                cmd.delete_if_exists(pg_tblspc_file)
                # create tablespace location, if does not exist
                # (raise an exception if it is not possible)
                cmd.create_dir_if_not_exists(location)
                # check for write permissions on destination directory
                cmd.check_write_permission(location)
                # create symlink between tablespace and recovery folder
                cmd.create_symbolic_link(location, pg_tblspc_file)
            except FsOperationFailed as e:
                output.error(
                    "unable to prepare '%s' tablespace "
                    "(destination '%s'): %s", item.name, location, e)
                output.close_and_exit()
            output.info("\t%s, %s, %s", item.oid, item.name, location)
Example #17
0
def exec_diagnose(servers, errors_list):
    """
    Diagnostic command: gathers information from backup server
    and from all the configured servers.

    Gathered information should be used for support and problems detection

    :param dict(str,barman.server.Server) servers: list of configured servers
    :param list errors_list: list of global errors
    """
    # global section. info about barman server
    diagnosis = {'global': {}, 'servers': {}}
    # barman global config
    diagnosis['global']['config'] = dict(barman.__config__._global_config)
    diagnosis['global']['config']['errors_list'] = errors_list
    command = fs.UnixLocalCommand()
    # basic system info
    diagnosis['global']['system_info'] = command.get_system_info()
    diagnosis['global']['system_info']['barman_ver'] = barman.__version__
    # per server section
    for name in sorted(servers):
        server = servers[name]
        if server is None:
            output.error("Unknown server '%s'" % name)
            continue
        # server configuration
        diagnosis['servers'][name] = {}
        diagnosis['servers'][name]['config'] = vars(server.config)
        del diagnosis['servers'][name]['config']['config']
        # server system info
        if server.config.ssh_command:
            try:
                command = fs.UnixRemoteCommand(
                    ssh_command=server.config.ssh_command, path=server.path)
                diagnosis['servers'][name]['system_info'] = (
                    command.get_system_info())
            except FsOperationFailed:
                pass
        # barman statuts information for the server
        diagnosis['servers'][name]['status'] = server.get_remote_status()
        # backup list
        backups = server.get_available_backups(BackupInfo.STATUS_ALL)
        diagnosis['servers'][name]['backups'] = backups
        # Release any PostgreSQL resource
        server.close()
    output.info(
        json.dumps(diagnosis,
                   sys.stdout,
                   cls=BarmanEncoder,
                   indent=4,
                   sort_keys=True))
    def _prepare_tablespaces(self, backup_info, cmd, dest, tablespaces):
        """
        Prepare the directory structure for required tablespaces,
        taking care of tablespaces relocation, if requested.

        :param barman.infofile.BackupInfo backup_info: backup representation
        :param barman.fs.UnixLocalCommand cmd: Object for
            filesystem interaction
        :param str dest: destination dir for the recovery
        :param dict tablespaces: dict of all the tablespaces and their location
        """
        tblspc_dir = os.path.join(dest, 'pg_tblspc')
        try:
            # check for pg_tblspc dir into recovery destination folder.
            # if it does not exists, create it
            cmd.create_dir_if_not_exists(tblspc_dir)
        except FsOperationFailed as e:
            output.error("unable to initialise tablespace directory "
                         "'%s': %s", tblspc_dir, e)
            output.close_and_exit()
        for item in backup_info.tablespaces:

            # build the filename of the link under pg_tblspc directory
            pg_tblspc_file = os.path.join(tblspc_dir, str(item.oid))

            # by default a tablespace goes in the same location where
            # it was on the source server when the backup was taken
            location = item.location

            # if a relocation has been requested for this tablespace,
            # use the target directory provided by the user
            if tablespaces and item.name in tablespaces:
                location = tablespaces[item.name]

            try:
                # remove the current link in pg_tblspc, if it exists
                # (raise an exception if it is a directory)
                cmd.delete_if_exists(pg_tblspc_file)
                # create tablespace location, if does not exist
                # (raise an exception if it is not possible)
                cmd.create_dir_if_not_exists(location)
                # check for write permissions on destination directory
                cmd.check_write_permission(location)
                # create symlink between tablespace and recovery folder
                cmd.create_symbolic_link(location, pg_tblspc_file)
            except FsOperationFailed as e:
                output.error("unable to prepare '%s' tablespace "
                             "(destination '%s'): %s",
                             item.name, location, e)
                output.close_and_exit()
            output.info("\t%s, %s, %s", item.oid, item.name, location)
Example #19
0
    def kill(self, process_info, retries=10):
        """
        Kill a process

        Returns True if killed successfully False otherwise

        :param ProcessInfo process_info: representation of the process
            we want to kill
        :param int retries: number of times the method will check
            if the process is still alive
        :rtype: bool
        """
        # Try to kill the process
        try:
            _logger.debug("Sending SIGINT to PID %s", process_info.pid)
            os.kill(process_info.pid, signal.SIGINT)
            _logger.debug("os.kill call succeeded")
        except OSError as e:
            _logger.debug("os.kill call failed: %s", e)
            # The process doesn't exists. It has probably just terminated.
            if e.errno == errno.ESRCH:
                return True
            # Something unexpected has happened
            output.error("%s", e)
            return False
        # Check if the process have been killed. the fastest (and maybe safest)
        # way is to send a kill with 0 as signal.
        # If the method returns an OSError exceptions, the process have been
        # killed successfully, otherwise is still alive.
        for counter in range(retries):
            try:
                _logger.debug("Checking with SIG_DFL if PID %s is still alive",
                              process_info.pid)
                os.kill(process_info.pid, signal.SIG_DFL)
                _logger.debug("os.kill call succeeded")
            except OSError as e:
                _logger.debug("os.kill call failed: %s", e)
                # If the process doesn't exists, we are done.
                if e.errno == errno.ESRCH:
                    return True
                # Something unexpected has happened
                output.error("%s", e)
                return False
            time.sleep(1)
        _logger.debug(
            "The PID %s has not been terminated after %s retries",
            process_info.pid,
            retries,
        )
        return False
Example #20
0
def exec_diagnose(servers, errors_list):
    """
    Diagnostic command: gathers information from backup server
    and from all the configured servers.

    Gathered information should be used for support and problems detection

    :param dict(str,barman.server.Server) servers: list of configured servers
    :param list errors_list: list of global errors
    """
    # global section. info about barman server
    diagnosis = {}
    diagnosis['global'] = {}
    diagnosis['servers'] = {}
    # barman global config
    diagnosis['global']['config'] = dict(barman.__config__._global_config)
    diagnosis['global']['config']['errors_list'] = errors_list
    command = fs.UnixLocalCommand()
    # basic system info
    diagnosis['global']['system_info'] = command.get_system_info()
    diagnosis['global']['system_info']['barman_ver'] = barman.__version__
    # per server section
    for name in sorted(servers):
        server = servers[name]
        if server is None:
            output.error("Unknown server '%s'" % name)
            continue
        # server configuration
        diagnosis['servers'][name] = {}
        diagnosis['servers'][name]['config'] = vars(server.config)
        del diagnosis['servers'][name]['config']['config']
        # server system info
        if server.config.ssh_command:
            try:
                command = fs.UnixRemoteCommand(
                    ssh_command=server.config.ssh_command)
                diagnosis['servers'][name]['system_info'] = (
                    command.get_system_info())
            except FsOperationFailed:
                pass
        # barman statuts information for the server
        diagnosis['servers'][name]['status'] = server.get_remote_status()
        # backup list
        backups = server.get_available_backups(BackupInfo.STATUS_ALL)
        diagnosis['servers'][name]['backups'] = backups
        # Release any PostgreSQL resource
        server.close()
    output.info(json.dumps(diagnosis, sys.stdout, cls=BarmanEncoder, indent=4,
                           sort_keys=True))
Example #21
0
def cron():
    """
    Run maintenance tasks
    """
    try:
        with lockfile.GlobalCronLock(barman.__config__.barman_lock_directory):
            servers = [Server(conf) for conf in barman.__config__.servers()
                       if conf.active]
            for server in servers:
                server.cron()
    except lockfile.LockFileBusy:
        output.info("Another cron is running")

    except lockfile.LockFilePermissionDenied, e:
        output.error("Permission denied, unable to access '%s'", e)
Example #22
0
def show_backup(args):
    """
    This method shows a single backup information
    """
    server = get_server(args)
    if server is None:
        output.error("Unknown server '%s'" % args.server_name)
    else:
        # Retrieves the backup
        backup_info = parse_backup_id(server, args)
        if backup_info is None:
            output.error("Unknown backup '%s' for server '%s'" % (
                args.backup_id, args.server_name))
        else:
            server.show_backup(backup_info)
    output.close_and_exit()
Example #23
0
def receive_wal(args):
    """
    Start a receive-wal process.
    The process uses the streaming protocol to receive WAL files
    from the PostgreSQL server.
    """
    server = get_server(args)
    if args.stop and args.reset:
        output.error("--stop and --reset options are not compatible")
    # If the caller requested to shutdown the receive-wal process deliver the
    # termination signal, otherwise attempt to start it
    elif args.stop:
        server.kill('receive-wal')
    else:
        server.receive_wal(reset=args.reset)
    output.close_and_exit()
Example #24
0
def delete(args):
    """
    Delete a backup
    """
    server = get_server(args)
    if server is None:
        output.error("Unknown server '%s'", args.server_name)
        output.close_and_exit()
    # Retrieves the backup
    backup = parse_backup_id(server, args)
    if backup is None:
        output.error("Unknown backup '%s' for server '%s'", args.backup_id,
                     args.server_name)
        output.close_and_exit()
    server.delete_backup(backup)
    output.close_and_exit()
Example #25
0
    def kill(self, process_info, retries=10):
        """
        Kill a process

        Returns True if killed successfully False otherwise

        :param ProcessInfo process_info: representation of the process
            we want to kill
        :param int retries: number of times the method will check
            if the process is still alive
        :rtype: bool
        """
        # Try to kill the process
        try:
            _logger.debug("Sending SIGINT to PID %s", process_info.pid)
            os.kill(process_info.pid, signal.SIGINT)
            _logger.debug("os.kill call succeeded")
        except OSError as e:
            _logger.debug("os.kill call failed: %s", e)
            # The process doesn't exists. It has probably just terminated.
            if e.errno == errno.ESRCH:
                return True
            # Something unexpected has happened
            output.error("%s", e)
            return False
        # Check if the process have been killed. the fastest (and maybe safest)
        # way is to send a kill with 0 as signal.
        # If the method returns an OSError exceptions, the process have been
        # killed successfully, otherwise is still alive.
        for counter in xrange(retries):
            try:
                _logger.debug("Sending SIG_DFL to PID %s",
                              process_info.pid)
                os.kill(process_info.pid, signal.SIG_DFL)
                _logger.debug("os.kill call succeeded")
            except OSError as e:
                _logger.debug("os.kill call failed: %s", e)
                # If the process doesn't exists, we are done.
                if e.errno == errno.ESRCH:
                    return True
                # Something unexpected has happened
                output.error("%s", e)
                return False
            time.sleep(1)
        _logger.debug("The PID %s has not been terminated after %s retries",
                      process_info.pid, retries)
        return False
Example #26
0
    def _reset_streaming_status(self, postgres_status, streaming_status):
        """
        Reset the status of receive-wal by removing the .partial file that
        is marking the current position and creating one that is current with
        the PostgreSQL insert location
        """
        current_wal = xlog.location_to_xlogfile_name_offset(
            postgres_status['current_lsn'], streaming_status['timeline'],
            postgres_status['xlog_segment_size'])['file_name']
        restart_wal = current_wal
        if postgres_status['replication_slot'] and \
                postgres_status['replication_slot'].restart_lsn:
            restart_wal = xlog.location_to_xlogfile_name_offset(
                postgres_status['replication_slot'].restart_lsn,
                streaming_status['timeline'],
                postgres_status['xlog_segment_size'])['file_name']
        restart_path = os.path.join(self.config.streaming_wals_directory,
                                    restart_wal)
        restart_partial_path = restart_path + '.partial'
        wal_files = sorted(glob(
            os.path.join(self.config.streaming_wals_directory, '*')),
                           reverse=True)

        # Pick the newer file
        last = None
        for last in wal_files:
            if xlog.is_wal_file(last) or xlog.is_partial_file(last):
                break

        # Check if the status is already up-to-date
        if not last or last == restart_partial_path or last == restart_path:
            output.info("Nothing to do. Position of receive-wal is aligned.")
            return

        if os.path.basename(last) > current_wal:
            output.error(
                "The receive-wal position is ahead of PostgreSQL "
                "current WAL lsn (%s > %s)", os.path.basename(last),
                postgres_status['current_xlog'])
            return

        output.info("Resetting receive-wal directory status")
        if xlog.is_partial_file(last):
            output.info("Removing status file %s" % last)
            os.unlink(last)
        output.info("Creating status file %s" % restart_partial_path)
        open(restart_partial_path, 'w').close()
Example #27
0
def list_files(args):
    """
    List all the files for a single backup
    """
    server = get_server(args)

    # Retrieves the backup
    backup_info = parse_backup_id(server, args)
    try:
        for line in backup_info.get_list_of_files(args.target):
            output.info(line, log=False)
    except BadXlogSegmentName as e:
        output.error(
            "invalid xlog segment name %r\n"
            "HINT: Please run \"barman rebuild-xlogdb %s\" "
            "to solve this issue", str(e), server.config.name)
        output.close_and_exit()
Example #28
0
def receive_wal(args):
    """
    Start a receive-wal process.
    The process uses the streaming protocol to receive WAL files
    from the PostgreSQL server.
    """
    server = get_server(args)
    if args.stop and args.reset:
        output.error("--stop and --reset options are not compatible")
    # If the caller requested to shutdown the receive-wal process deliver the
    # termination signal, otherwise attempt to start it
    elif args.stop:
        server.kill('receive-wal')
    else:
        with closing(server):
            server.receive_wal(reset=args.reset)
    output.close_and_exit()
Example #29
0
def cron():
    """
    Run maintenance tasks
    """
    lockname = os.path.join(barman.__config__.barman_home, '.cron.lock')
    try:
        with lockfile.LockFile(lockname, raise_if_fail=True):
            servers = [Server(conf) for conf in barman.__config__.servers()]
            for server in servers:
                server.cron()
    except lockfile.LockFileBusy:
        output.info("Another cron is running")

    except lockfile.LockFilePermissionDenied:
        output.error("Permission denied, unable to access '%s'",
                     lockname)
    output.close_and_exit()
Example #30
0
def list_files(args):
    """
    List all the files for a single backup
    """
    server = get_server(args)
    if server is None:
        output.error("Unknown server '%s'", args.server_name)
        output.close_and_exit()
    # Retrieves the backup
    backup = parse_backup_id(server, args)
    if backup is None:
        output.error("Unknown backup '%s' for server '%s'", args.backup_id,
                     args.server_name)
        output.close_and_exit()
    for line in backup.get_list_of_files(args.target):
        output.info(line, log=False)
    output.close_and_exit()
Example #31
0
def check(args):
    """
    Check if the server configuration is working.

    This command returns success if every checks pass,
    or failure if any of these fails
    """
    if args.nagios:
        output.set_output_writer(output.NagiosOutputWriter())
    servers = get_server_list(args)
    for name in sorted(servers):
        server = servers[name]
        if server is None:
            output.error("Unknown server '%s'" % name)
            continue
        output.init('check', name)
        server.check()
    output.close_and_exit()
Example #32
0
    def cron(self, verbose=True, wals=True, retention_policies=True):
        """
        Maintenance operations

        :param bool verbose: report even if no actions
        :param bool wals: WAL archive maintenance
        :param bool retention_policies: retention policy maintenance
        """
        try:
            with ServerCronLock(self.config.barman_lock_directory, self.config.name):
                # Standard maintenance (WAL archive)
                if wals:
                    self.backup_manager.cron(verbose=verbose)
                # Retention policy management
                if retention_policies:
                    self.backup_manager.cron_retention_policy()
        except LockFilePermissionDenied, e:
            output.error("Permission denied, unable to access '%s'" % e)
Example #33
0
def list_files(args):
    """
    List all the files for a single backup
    """
    server = get_server(args)

    # Retrieves the backup
    backup_id = parse_backup_id(server, args)
    try:
        for line in backup_id.get_list_of_files(args.target):
            output.info(line, log=False)
    except BadXlogSegmentName as e:
        output.error(
            "invalid xlog segment name %r\n"
            "HINT: Please run \"barman rebuild-xlogdb %s\" "
            "to solve this issue",
            str(e), server.config.name)
        output.close_and_exit()
Example #34
0
    def test_error(self, caplog):
        # preparation
        writer = self._mock_writer()

        msg = 'test message'
        output.error(msg)

        # logging test
        for record in caplog.records():
            assert record.levelname == 'ERROR'
            assert record.name == __name__
        assert msg in caplog.text()

        # writer test
        writer.error_occurred.assert_called_once_with()
        writer.error.assert_called_once_with(msg)

        # global status test
        assert output.error_occurred
Example #35
0
def get_server(args, active_only=False):
    """
    Get a single server from the configuration

    :param args: an argparse namespace containing a single server_name parameter
    :param bool active_only: Exit with error if the server is disabled
    """
    config = barman.__config__.get_server(args.server_name)
    if not config:
        return None
    # If the server is disabled exit with error
    if active_only and not config.active:
        output.error(
            "Server '%s' is disabled.\n"
            "HINT: remove 'active=False' from server configuration "
            "to enable it.",
            config.name)
        output.close_and_exit()
    return Server(config)
Example #36
0
def backup(args):
    """
    Perform a full backup for the given server
    """
    servers = get_server_list(args)
    for name in sorted(servers):
        server = servers[name]
        if server is None:
            output.error("Unknown server '%s'" % name)
            continue
        if args.retry_sleep is not None:
            server.config.basebackup_retry_sleep = args.retry_sleep
        if args.retry_times is not None:
            server.config.basebackup_retry_times = args.retry_times
        if hasattr(args, 'immediate_checkpoint'):
            server.config.immediate_checkpoint = args.immediate_checkpoint
        server.backup()

    output.close_and_exit()
Example #37
0
    def test_error(self, caplog):
        # preparation
        writer = self._mock_writer()

        msg = 'test message'
        output.error(msg)

        # logging test
        for record in caplog.records:
            assert record.levelname == 'ERROR'
            assert record.name == __name__
        assert msg in caplog.text

        # writer test
        writer.error_occurred.assert_called_once_with()
        writer.error.assert_called_once_with(msg)

        # global status test
        assert output.error_occurred
Example #38
0
    def test_error_with_ignore(self, caplog):
        # preparation
        writer = self._mock_writer()

        msg = 'test format %02d %s'
        args = (1, '2nd')
        output.error(msg, ignore=True, *args)

        # logging test
        for record in caplog.records():
            assert record.levelname == 'ERROR'
            assert record.name == __name__
        assert msg % args in caplog.text()

        # writer test
        assert not writer.error_occurred.called
        writer.error.assert_called_once_with(msg, *args)

        # global status test
        assert not output.error_occurred
Example #39
0
    def test_error_with_ignore(self, caplog):
        # preparation
        writer = self._mock_writer()

        msg = 'test format %02d %s'
        args = (1, '2nd')
        output.error(msg, ignore=True, *args)

        # logging test
        for record in caplog.records:
            assert record.levelname == 'ERROR'
            assert record.name == __name__
        assert msg % args in caplog.text

        # writer test
        assert not writer.error_occurred.called
        writer.error.assert_called_once_with(msg, *args)

        # global status test
        assert not output.error_occurred
Example #40
0
def sync_info(args):
    """
    Output the internal synchronisation status.
    Used to sync_backup with a passive node
    """
    server = get_server(args)
    try:
        # if called with --primary option
        if getattr(args, 'primary', False):
            primary_info = server.primary_node_info(args.last_wal,
                                                    args.last_position)
            output.info(json.dumps(primary_info, cls=BarmanEncoder, indent=4),
                        log=False)
        else:
            server.sync_status(args.last_wal, args.last_position)
    except SyncError as e:
        # Catch SyncError exceptions and output only the error message,
        # preventing from logging the stack trace
        output.error(e)

    output.close_and_exit()
Example #41
0
    def _detect_missing_keys(config_items, required_keys, section):
        """
        Check config for any missing required keys

        :param config_items: list of tuples containing provided parameters
            along with their values
        :param required_keys: list of required keys
        :param section: source section (for error reporting)
        """
        missing_key_detected = False

        config_keys = [item[0] for item in config_items]
        for req_key in required_keys:
            # if a required key is not found, then print an error
            if req_key not in config_keys:
                output.error(
                    'Parameter "%s" is required in [%s] section.' %
                    (req_key, section), )
                missing_key_detected = True
        if missing_key_detected:
            raise SystemExit(
                "Your configuration is missing required parameters. Exiting.")
Example #42
0
def parse_backup_id(server, args):
    """
    Parses backup IDs including special words such as latest, oldest, etc.

    Exit with error if the backup id doesn't exist.

    :param Server server: server object to search for the required backup
    :param args: command lien arguments namespace
    :rtype: BackupInfo
    """
    if args.backup_id in ('latest', 'last'):
        backup_id = server.get_last_backup_id()
    elif args.backup_id in ('oldest', 'first'):
        backup_id = server.get_first_backup_id()
    else:
        backup_id = args.backup_id
    backup_info = server.get_backup(backup_id)
    if backup_info is None:
        output.error("Unknown backup '%s' for server '%s'", args.backup_id,
                     server.config.name)
        output.close_and_exit()
    return backup_info
Example #43
0
    def remove_wal_before_backup(self, backup_info):
        """
        Remove WAL files which have been archived before the start of
        the provided backup.

        If no backup_info is provided delete all available WAL files

        :param BackupInfo|None backup_info: the backup information structure
        :return list: a list of removed WAL files
        """
        removed = []
        with self.server.xlogdb() as fxlogdb:
            xlogdb_new = fxlogdb.name + ".new"
            with open(xlogdb_new, 'w') as fxlogdb_new:
                for line in fxlogdb:
                    wal_info = WalFileInfo.from_xlogdb_line(line)
                    if not xlog.is_any_xlog_file(wal_info.name):
                        output.error(
                            "invalid xlog segment name %r\n"
                            "HINT: Please run \"barman rebuild-xlogdb %s\" "
                            "to solve this issue", wal_info.name,
                            self.config.name)
                        continue
                    # Keeps the WAL segment if it is a history file or later
                    # than the given backup (the first available)
                    if (xlog.is_history_file(wal_info.name)
                            or (backup_info
                                and wal_info.name >= backup_info.begin_wal)):
                        fxlogdb_new.write(wal_info.to_xlogdb_line())
                        continue
                    else:
                        self.delete_wal(wal_info)
                        removed.append(wal_info.name)
                fxlogdb_new.flush()
                os.fsync(fxlogdb_new.fileno())
            shutil.move(xlogdb_new, fxlogdb.name)
            fsync_dir(os.path.dirname(fxlogdb.name))
        return removed
Example #44
0
def unix_command_factory(remote_command=None, path=None):
    """
    Function in charge of instantiating a Unix Command.

    :param remote_command:
    :param path:
    :return: UnixLocalCommand
    """
    if remote_command:
        try:
            cmd = UnixRemoteCommand(remote_command, path=path)
            logging.debug("Created a UnixRemoteCommand")
            return cmd
        except FsOperationFailed:
            output.error(
                "Unable to connect to the target host using the command '%s'",
                remote_command,
            )
            output.close_and_exit()
    else:
        cmd = UnixLocalCommand()
        logging.debug("Created a UnixLocalCommand")
        return cmd
Example #45
0
def manage_server_command(
    server,
    name=None,
    inactive_is_error=False,
    disabled_is_error=True,
    skip_inactive=True,
    skip_disabled=True,
):
    """
    Standard and consistent method for managing server errors within
    a server command execution. By default, suggests to skip any inactive
    and disabled server; it also emits errors for disabled servers by
    default.

    Returns True if the command has to be executed for this server.

    :param barman.server.Server server: server to be checked for errors
    :param str name: name of the server, in a multi-server command
    :param bool inactive_is_error: treat inactive server as error
    :param bool disabled_is_error: treat disabled server as error
    :param bool skip_inactive: skip if inactive
    :param bool skip_disabled: skip if disabled
    :return: True if the command has to be executed on this server
    :rtype: boolean
    """

    # Unknown server (skip it)
    if not server:
        output.error("Unknown server '%s'" % name)
        return False

    if not server.config.active:
        # Report inactive server as error
        if inactive_is_error:
            output.error("Inactive server: %s" % server.config.name)
        if skip_inactive:
            return False

    # Report disabled server as error
    if server.config.disabled:
        # Output all the messages as errors, and exit terminating the run.
        if disabled_is_error:
            for message in server.config.msg_list:
                output.error(message)
        if skip_disabled:
            return False

    # All ok, execute the command
    return True
Example #46
0
    def recover(self,
                backup_info,
                dest,
                tablespaces=None,
                remote_command=None,
                target_tli=None,
                target_time=None,
                target_xid=None,
                target_name=None,
                target_immediate=False,
                exclusive=False,
                target_action=None,
                standby_mode=None):
        """
        Performs a recovery of a backup

        This method should be called in a closing context

        :param barman.infofile.BackupInfo backup_info: the backup to recover
        :param str dest: the destination directory
        :param dict[str,str]|None tablespaces: a tablespace
            name -> location map (for relocation)
        :param str|None remote_command: The remote command to recover
                               the base backup, in case of remote backup.
        :param str|None target_tli: the target timeline
        :param str|None target_time: the target time
        :param str|None target_xid: the target xid
        :param str|None target_name: the target name created previously with
                            pg_create_restore_point() function call
        :param str|None target_immediate: end recovery as soon as consistency
            is reached
        :param bool exclusive: whether the recovery is exclusive or not
        :param str|None target_action: The recovery target action
        :param bool|None standby_mode: standby mode
        """

        # Run the cron to be sure the wal catalog is up to date
        # Prepare a map that contains all the objects required for a recovery
        recovery_info = self._setup(backup_info, remote_command, dest)
        output.info("Starting %s restore for server %s using backup %s",
                    recovery_info['recovery_dest'], self.server.config.name,
                    backup_info.backup_id)
        output.info("Destination directory: %s", dest)
        if remote_command:
            output.info("Remote command: %s", remote_command)

        # If the backup we are recovering is still not validated and we
        # haven't requested the get-wal feature, display a warning message
        if not recovery_info['get_wal']:
            if backup_info.status == BackupInfo.WAITING_FOR_WALS:
                output.warning(
                    "IMPORTANT: You have requested a recovery operation for "
                    "a backup that does not have yet all the WAL files that "
                    "are required for consistency.")

        # Set targets for PITR
        self._set_pitr_targets(recovery_info, backup_info, dest, target_name,
                               target_time, target_tli, target_xid,
                               target_immediate, target_action)

        # Retrieve the safe_horizon for smart copy
        self._retrieve_safe_horizon(recovery_info, backup_info, dest)

        # check destination directory. If doesn't exist create it
        try:
            recovery_info['cmd'].create_dir_if_not_exists(dest)
        except FsOperationFailed as e:
            output.error(
                "unable to initialise destination directory "
                "'%s': %s", dest, e)
            output.close_and_exit()

        # Initialize tablespace directories
        if backup_info.tablespaces:
            self._prepare_tablespaces(backup_info, recovery_info['cmd'], dest,
                                      tablespaces)
        # Copy the base backup
        output.info("Copying the base backup.")
        try:
            self._backup_copy(backup_info, dest, tablespaces, remote_command,
                              recovery_info['safe_horizon'])
        except DataTransferFailure as e:
            output.error("Failure copying base backup: %s", e)
            output.close_and_exit()

        # Copy the backup.info file in the destination as
        # ".barman-recover.info"
        if remote_command:
            try:
                recovery_info['rsync'](backup_info.filename,
                                       ':%s/.barman-recover.info' % dest)
            except CommandFailedException as e:
                output.error('copy of recovery metadata file failed: %s', e)
                output.close_and_exit()
        else:
            backup_info.save(os.path.join(dest, '.barman-recover.info'))

        # Standby mode is not available for PostgreSQL older than 9.0
        if backup_info.version < 90000 and standby_mode:
            raise RecoveryStandbyModeException(
                'standby_mode is available only from PostgreSQL 9.0')

        # Restore the WAL segments. If GET_WAL option is set, skip this phase
        # as they will be retrieved using the wal-get command.
        if not recovery_info['get_wal']:
            # If the backup we restored is still waiting for WALS, read the
            # backup info again and check whether it has been validated.
            # Notify the user if it is still not DONE.
            if backup_info.status == BackupInfo.WAITING_FOR_WALS:
                data = BackupInfo(self.server, backup_info.filename)
                if data.status == BackupInfo.WAITING_FOR_WALS:
                    output.warning(
                        "IMPORTANT: The backup we have recovered IS NOT "
                        "VALID. Required WAL files for consistency are "
                        "missing. Please verify that WAL archiving is "
                        "working correctly or evaluate using the 'get-wal' "
                        "option for recovery")

            output.info("Copying required WAL segments.")

            try:
                # Retrieve a list of required log files
                required_xlog_files = tuple(
                    self.server.get_required_xlog_files(
                        backup_info, target_tli,
                        recovery_info['target_epoch']))

                # Restore WAL segments into the wal_dest directory
                self._xlog_copy(required_xlog_files, recovery_info['wal_dest'],
                                remote_command)
            except DataTransferFailure as e:
                output.error("Failure copying WAL files: %s", e)
                output.close_and_exit()
            except BadXlogSegmentName as e:
                output.error(
                    "invalid xlog segment name %r\n"
                    "HINT: Please run \"barman rebuild-xlogdb %s\" "
                    "to solve this issue", force_str(e), self.config.name)
                output.close_and_exit()
            # If WAL files are put directly in the pg_xlog directory,
            # avoid shipping of just recovered files
            # by creating the corresponding archive status file
            if not recovery_info['is_pitr']:
                output.info("Generating archive status files")
                self._generate_archive_status(recovery_info, remote_command,
                                              required_xlog_files)

        # Generate recovery.conf file (only if needed by PITR or get_wal)
        is_pitr = recovery_info['is_pitr']
        get_wal = recovery_info['get_wal']
        if is_pitr or get_wal or standby_mode:
            output.info("Generating recovery.conf")
            self._generate_recovery_conf(recovery_info, backup_info, dest,
                                         target_immediate, exclusive,
                                         remote_command, target_name,
                                         target_time, target_tli, target_xid,
                                         standby_mode)

        # Create archive_status directory if necessary
        archive_status_dir = os.path.join(recovery_info['wal_dest'],
                                          'archive_status')
        try:
            recovery_info['cmd'].create_dir_if_not_exists(archive_status_dir)
        except FsOperationFailed as e:
            output.error(
                "unable to create the archive_status directory "
                "'%s': %s", archive_status_dir, e)
            output.close_and_exit()

        # As last step, analyse configuration files in order to spot
        # harmful options. Barman performs automatic conversion of
        # some options as well as notifying users of their existence.
        #
        # This operation is performed in three steps:
        # 1) mapping
        # 2) analysis
        # 3) copy
        output.info("Identify dangerous settings in destination directory.")

        self._map_temporary_config_files(recovery_info, backup_info,
                                         remote_command)
        self._analyse_temporary_config_files(recovery_info)
        self._copy_temporary_config_files(dest, remote_command, recovery_info)

        return recovery_info
Example #47
0
    def _generate_recovery_conf(self, recovery_info, backup_info, dest,
                                exclusive, remote_command, target_name,
                                target_time, target_tli, target_xid):
        """
        Generate a recovery.conf file for PITR containing
        all the required configurations

        :param dict recovery_info: Dictionary containing all the recovery
            parameters
        :param barman.infofile.BackupInfo backup_info: representation of a
            backup
        :param str dest: destination directory of the recovery
        :param boolean exclusive: exclusive backup or concurrent
        :param str remote_command: ssh command for remote connection
        :param str target_name: recovery target name for PITR
        :param str target_time: recovery target time for PITR
        :param str target_tli: recovery target timeline for PITR
        :param str target_xid: recovery target transaction id for PITR
        """
        if remote_command:
            recovery = open(
                os.path.join(recovery_info['tempdir'], 'recovery.conf'), 'w')
        else:
            recovery = open(os.path.join(dest, 'recovery.conf'), 'w')

        # If GET_WAL has been set, use the get-wal command to retrieve the
        # required wal files. Otherwise use the unix command "cp" to copy
        # them from the barman_xlog directory
        if recovery_info['get_wal']:
            # We need to create the right restore command.
            # If we are doing a remote recovery,
            # the barman-cli package is REQUIRED on the server that is hosting
            # the PostgreSQL server.
            # We use the machine FQDN and the barman_user
            # setting to call the barman-wal-restore correctly.
            # If local recovery, we use barman directly, assuming
            # the postgres process will be executed with the barman user.
            # It MUST to be reviewed by the user in any case.
            if remote_command:
                fqdn = socket.getfqdn()
                print(
                    "# The 'barman-wal-restore' command "
                    "is provided in the 'barman-cli' package",
                    file=recovery)
                print("restore_command = 'barman-wal-restore -U %s "
                      "%s %s %%f %%p'" %
                      (self.config.config.user, fqdn, self.config.name),
                      file=recovery)
            else:
                print("# The 'barman get-wal' command "
                      "must run as '%s' user" % self.config.config.user,
                      file=recovery)
                print("restore_command = 'sudo -u %s "
                      "barman get-wal %s %%f > %%p'" %
                      (self.config.config.user, self.config.name),
                      file=recovery)
            recovery_info['results']['get_wal'] = True
        else:
            print("restore_command = 'cp barman_xlog/%f %p'", file=recovery)
        if backup_info.version >= 80400 and \
                not recovery_info['get_wal']:
            print("recovery_end_command = 'rm -fr barman_xlog'", file=recovery)
        if target_time:
            print("recovery_target_time = '%s'" % target_time, file=recovery)
        if target_tli:
            print("recovery_target_timeline = %s" % target_tli, file=recovery)
        if target_xid:
            print("recovery_target_xid = '%s'" % target_xid, file=recovery)
        if target_name:
            print("recovery_target_name = '%s'" % target_name, file=recovery)
        if (target_xid or target_time) and exclusive:
            print("recovery_target_inclusive = '%s'" % (not exclusive),
                  file=recovery)
        recovery.close()
        if remote_command:
            plain_rsync = RsyncPgData(
                path=self.server.path,
                ssh=remote_command,
                bwlimit=self.config.bandwidth_limit,
                network_compression=self.config.network_compression)
            try:
                plain_rsync.from_file_list(['recovery.conf'],
                                           recovery_info['tempdir'],
                                           ':%s' % dest)
            except CommandFailedException as e:
                output.error('remote copy of recovery.conf failed: %s', e)
                output.close_and_exit()
Example #48
0
    def recover(self, backup_info, dest, tablespaces, target_tli, target_time,
                target_xid, target_name, exclusive, remote_command):
        """
        Performs a recovery of a backup

        :param barman.infofile.BackupInfo backup_info: the backup to recover
        :param str dest: the destination directory
        :param dict[str,str]|None tablespaces: a tablespace
            name -> location map (for relocation)
        :param str|None target_tli: the target timeline
        :param str|None target_time: the target time
        :param str|None target_xid: the target xid
        :param str|None target_name: the target name created previously with
                            pg_create_restore_point() function call
        :param bool exclusive: whether the recovery is exclusive or not
        :param str|None remote_command: The remote command to recover
                               the base backup, in case of remote backup.
        """

        # Run the cron to be sure the wal catalog is up to date
        # Prepare a map that contains all the objects required for a recovery
        recovery_info = self._setup(backup_info, remote_command, dest)
        output.info("Starting %s restore for server %s using backup %s",
                    recovery_info['recovery_dest'], self.server.config.name,
                    backup_info.backup_id)
        output.info("Destination directory: %s", dest)

        # Set targets for PITR
        self._set_pitr_targets(recovery_info, backup_info, dest, target_name,
                               target_time, target_tli, target_xid)

        # Retrieve the safe_horizon for smart copy
        self._retrieve_safe_horizon(recovery_info, backup_info, dest)

        # check destination directory. If doesn't exist create it
        try:
            recovery_info['cmd'].create_dir_if_not_exists(dest)
        except FsOperationFailed as e:
            output.error(
                "unable to initialise destination directory "
                "'%s': %s", dest, e)
            output.close_and_exit()

        # Initialize tablespace directories
        if backup_info.tablespaces:
            self._prepare_tablespaces(backup_info, recovery_info['cmd'], dest,
                                      tablespaces)
        # Copy the base backup
        output.info("Copying the base backup.")
        try:
            self._backup_copy(backup_info, dest, tablespaces, remote_command,
                              recovery_info['safe_horizon'])
        except DataTransferFailure as e:
            output.error("Failure copying base backup: %s", e)
            output.close_and_exit()

        # Copy the backup.info file in the destination as
        # ".barman-recover.info"
        if remote_command:
            try:
                recovery_info['rsync'](backup_info.filename,
                                       ':%s/.barman-recover.info' % dest)
            except CommandFailedException as e:
                output.error('copy of recovery metadata file failed: %s', e)
                output.close_and_exit()
        else:
            backup_info.save(os.path.join(dest, '.barman-recover.info'))

        # Restore the WAL segments. If GET_WAL option is set, skip this phase
        # as they will be retrieved using the wal-get command.
        if not recovery_info['get_wal']:
            output.info("Copying required WAL segments.")

            try:
                # Retrieve a list of required log files
                required_xlog_files = tuple(
                    self.server.get_required_xlog_files(
                        backup_info, target_tli,
                        recovery_info['target_epoch']))

                # Restore WAL segments into the wal_dest directory
                self._xlog_copy(required_xlog_files, recovery_info['wal_dest'],
                                remote_command)
            except DataTransferFailure as e:
                output.error("Failure copying WAL files: %s", e)
                output.close_and_exit()
            except BadXlogSegmentName as e:
                output.error(
                    "invalid xlog segment name %r\n"
                    "HINT: Please run \"barman rebuild-xlogdb %s\" "
                    "to solve this issue", str(e), self.config.name)
                output.close_and_exit()
            # If WAL files are put directly in the pg_xlog directory,
            # avoid shipping of just recovered files
            # by creating the corresponding archive status file
            if not recovery_info['is_pitr']:
                output.info("Generating archive status files")
                self._generate_archive_status(recovery_info, remote_command,
                                              required_xlog_files)

        # Generate recovery.conf file (only if needed by PITR)
        if recovery_info['is_pitr']:
            output.info("Generating recovery.conf")
            self._generate_recovery_conf(recovery_info, backup_info, dest,
                                         exclusive, remote_command,
                                         target_name, target_time, target_tli,
                                         target_xid)

        # Create archive_status directory if necessary
        archive_status_dir = os.path.join(recovery_info['wal_dest'],
                                          'archive_status')
        try:
            recovery_info['cmd'].create_dir_if_not_exists(archive_status_dir)
        except FsOperationFailed as e:
            output.error(
                "unable to create the archive_status directory "
                "'%s': %s", archive_status_dir, e)
            output.close_and_exit()

        # As last step, analyse configuration files in order to spot
        # harmful options. Barman performs automatic conversion of
        # some options as well as notifying users of their existence.
        #
        # This operation is performed in three steps:
        # 1) mapping
        # 2) analysis
        # 3) copy
        output.info("Identify dangerous settings in destination directory.")

        self._map_temporary_config_files(recovery_info, backup_info,
                                         remote_command)
        self._analyse_temporary_config_files(recovery_info)
        self._copy_temporary_config_files(dest, remote_command, recovery_info)

        # Cleanup operations
        self._teardown(recovery_info)

        return recovery_info
Example #49
0
def get_server_list(args=None,
                    skip_inactive=False,
                    skip_disabled=False,
                    on_error_stop=True,
                    suppress_error=False):
    """
    Get the server list from the configuration

    If args the parameter is None or arg.server_name is ['all']
    returns all defined servers

    :param args: an argparse namespace containing a list server_name parameter
    :param bool skip_inactive: skip inactive servers when 'all' is required
    :param bool skip_disabled: skip disabled servers when 'all' is required
    :param bool on_error_stop: stop if an error is found
    :param bool suppress_error: suppress display of errors (e.g. diagnose)
    :rtype: dict(str,barman.server.Server|None)
    """
    server_dict = {}

    # This function must to be called with in a multiple-server context
    assert not args or isinstance(args.server_name, list)

    # Generate the list of servers (required for global errors)
    available_servers = barman.__config__.server_names()

    # Get a list of configuration errors from all the servers
    global_error_list = barman.__config__.servers_msg_list

    # Global errors have higher priority
    if global_error_list:
        # Output the list of global errors
        if not suppress_error:
            for error in global_error_list:
                output.error(error)

        # If requested, exit on first error
        if on_error_stop:
            output.close_and_exit()
            # The following return statement will never be reached
            # but it is here for clarity
            return {}

    # Handle special 'all' server cases
    # - args is None
    # - 'all' special name
    if not args or 'all' in args.server_name:
        # When 'all' is used, it must be the only specified argument
        if args and len(args.server_name) != 1:
            output.error("You cannot use 'all' with other server names")
        servers = available_servers
    else:
        servers = args.server_name

    # Loop through all the requested servers
    for server in servers:
        conf = barman.__config__.get_server(server)
        if conf is None:
            # Unknown server
            server_dict[server] = None
        else:
            server_object = Server(conf)
            # Skip inactive servers, if requested
            if skip_inactive and not server_object.config.active:
                output.info("Skipping inactive server '%s'" % conf.name)
                continue
            # Skip disabled servers, if requested
            if skip_disabled and server_object.config.disabled:
                output.info("Skipping temporarily disabled server '%s'" %
                            conf.name)
                continue
            server_dict[server] = server_object

    return server_dict
Example #50
0
def get_server(args,
               skip_inactive=True,
               skip_disabled=False,
               inactive_is_error=False,
               on_error_stop=True,
               suppress_error=False):
    """
    Get a single server retrieving its configuration (wraps get_server_list())

    Returns a Server object or None if the required server is unknown and
    on_error_stop is False.

    WARNING: this function modifies the 'args' parameter

    :param args: an argparse namespace containing a single
        server_name parameter
        WARNING: the function modifies the content of this parameter
    :param bool skip_inactive: do nothing if the server is inactive
    :param bool skip_disabled: do nothing if the server is disabled
    :param bool inactive_is_error: treat inactive server as error
    :param bool on_error_stop: stop if an error is found
    :param bool suppress_error: suppress display of errors (e.g. diagnose)
    :rtype: barman.server.Server|None
    """
    # This function must to be called with in a single-server context
    name = args.server_name
    assert isinstance(name, str)

    # The 'all' special name is forbidden in this context
    if name == 'all':
        output.error("You cannot use 'all' in a single server context")
        output.close_and_exit()
        # The following return statement will never be reached
        # but it is here for clarity
        return None

    # Builds a list from a single given name
    args.server_name = [name]

    # Skip_inactive is reset if inactive_is_error is set, because
    # it needs to retrieve the inactive server to emit the error.
    skip_inactive &= not inactive_is_error

    # Retrieve the requested server
    servers = get_server_list(args, skip_inactive, skip_disabled,
                              on_error_stop, suppress_error)

    # The requested server has been excluded from get_server_list result
    if len(servers) == 0:
        output.close_and_exit()
        # The following return statement will never be reached
        # but it is here for clarity
        return None

    # retrieve the server object
    server = servers[name]

    # Apply standard validation control and skips
    # the server if inactive or disabled, displaying standard
    # error messages. If on_error_stop (default) exits
    if not manage_server_command(server, name,
                                 inactive_is_error) and \
            on_error_stop:
        output.close_and_exit()
        # The following return statement will never be reached
        # but it is here for clarity
        return None

    # Returns the filtered server
    return server
Example #51
0
    def backup(self):
        """
        Performs a backup for the server
        """
        _logger.debug("initialising backup information")
        self.executor.init()
        backup_info = None
        try:
            # Create the BackupInfo object representing the backup
            backup_info = BackupInfo(
                self.server,
                backup_id=datetime.datetime.now().strftime('%Y%m%dT%H%M%S'))
            backup_info.save()
            self.backup_cache_add(backup_info)
            output.info("Starting backup using %s method for server %s in %s",
                        self.mode, self.config.name,
                        backup_info.get_basebackup_directory())

            # Run the pre-backup-script if present.
            script = HookScriptRunner(self, 'backup_script', 'pre')
            script.env_from_backup_info(backup_info)
            script.run()

            # Run the pre-backup-retry-script if present.
            retry_script = RetryHookScriptRunner(self, 'backup_retry_script',
                                                 'pre')
            retry_script.env_from_backup_info(backup_info)
            retry_script.run()

            # Do the backup using the BackupExecutor
            self.executor.backup(backup_info)

            # Compute backup size and fsync it on disk
            self.backup_fsync_and_set_sizes(backup_info)

            # Mark the backup as DONE
            backup_info.set_attribute("status", "DONE")
        # Use BaseException instead of Exception to catch events like
        # KeyboardInterrupt (e.g.: CRTL-C)
        except BaseException as e:
            msg_lines = str(e).strip().splitlines()
            if backup_info:
                # Use only the first line of exception message
                # in backup_info error field
                backup_info.set_attribute("status", "FAILED")
                # If the exception has no attached message use the raw
                # type name
                if len(msg_lines) == 0:
                    msg_lines = [type(e).__name__]
                backup_info.set_attribute(
                    "error", "failure %s (%s)" %
                    (self.executor.current_action, msg_lines[0]))

            output.error("Backup failed %s.\nDETAILS: %s\n%s",
                         self.executor.current_action, msg_lines[0],
                         '\n'.join(msg_lines[1:]))

        else:
            output.info("Backup end at LSN: %s (%s, %08X)",
                        backup_info.end_xlog, backup_info.end_wal,
                        backup_info.end_offset)
            output.info(
                "Backup completed (start time: %s, elapsed time: %s)",
                self.executor.copy_start_time,
                human_readable_timedelta(self.executor.copy_end_time -
                                         self.executor.copy_start_time))
            # Create a restore point after a backup
            target_name = 'barman_%s' % backup_info.backup_id
            self.server.postgres.create_restore_point(target_name)
        finally:
            if backup_info:
                backup_info.save()

                # Make sure we are not holding any PostgreSQL connection
                # during the post-backup scripts
                self.server.close()

                # Run the post-backup-retry-script if present.
                try:
                    retry_script = RetryHookScriptRunner(
                        self, 'backup_retry_script', 'post')
                    retry_script.env_from_backup_info(backup_info)
                    retry_script.run()
                except AbortedRetryHookScript as e:
                    # Ignore the ABORT_STOP as it is a post-hook operation
                    _logger.warning(
                        "Ignoring stop request after receiving "
                        "abort (exit code %d) from post-backup "
                        "retry hook script: %s", e.hook.exit_status,
                        e.hook.script)

                # Run the post-backup-script if present.
                script = HookScriptRunner(self, 'backup_script', 'post')
                script.env_from_backup_info(backup_info)
                script.run()

        output.result('backup', backup_info)
Example #52
0
def recover(args):
    """
    Recover a server at a given time or xid or barrier id
    """
    server = get_server(args)

    # Retrieves the backup
    backup_id = parse_backup_id(server, args)
    if backup_id.status != BackupInfo.DONE:
        output.error(
            "Cannot recover from backup '%s' of server '%s': "
            "backup status is not DONE",
            args.backup_id, server.config.name)
        output.close_and_exit()

    # decode the tablespace relocation rules
    tablespaces = {}
    if args.tablespace:
        for rule in args.tablespace:
            try:
                tablespaces.update([rule.split(':', 1)])
            except ValueError:
                output.error(
                    "Invalid tablespace relocation rule '%s'\n"
                    "HINT: The valid syntax for a relocation rule is "
                    "NAME:LOCATION", rule)
                output.close_and_exit()

    # validate the rules against the tablespace list
    valid_tablespaces = []
    if backup_id.tablespaces:
        valid_tablespaces = [tablespace_data.name for tablespace_data in
                             backup_id.tablespaces]
    for item in tablespaces:
        if item not in valid_tablespaces:
            output.error("Invalid tablespace name '%s'\n"
                         "HINT: Please use any of the following "
                         "tablespaces: %s",
                         item, ', '.join(valid_tablespaces))
            output.close_and_exit()

    # explicitly disallow the rsync remote syntax (common mistake)
    if ':' in args.destination_directory:
        output.error(
            "The destination directory parameter "
            "cannot contain the ':' character\n"
            "HINT: If you want to do a remote recovery you have to use "
            "the --remote-ssh-command option")
        output.close_and_exit()
    if args.retry_sleep is not None:
        server.config.basebackup_retry_sleep = args.retry_sleep
    if args.retry_times is not None:
        server.config.basebackup_retry_times = args.retry_times
    if hasattr(args, 'get_wal'):
        if args.get_wal:
            server.config.recovery_options.add(RecoveryOptions.GET_WAL)
        else:
            server.config.recovery_options.remove(RecoveryOptions.GET_WAL)
    if args.jobs is not None:
        server.config.parallel_jobs = args.jobs

    # PostgreSQL supports multiple parameters to specify when the recovery
    # process will end, and in that case the last entry in recovery.conf
    # will be used. See [1]
    #
    # Since the meaning of the target options is not dependent on the order
    # of parameters, we decided to make the target options mutually exclusive.
    #
    # [1]: https://www.postgresql.org/docs/current/static/
    #   recovery-target-settings.html
    #  add target_barrier for adb by danghb @171018
    target_options = ['target_tli', 'target_time', 'target_xid',
                      'target_name', 'target_barrier', 'target_immediate']
    specified_target_options = len(
        [option for option in target_options if getattr(args, option)])
    if specified_target_options > 1:
        output.error(
            "You cannot specify multiple targets for the recovery operation")
        output.close_and_exit()

    if hasattr(args, 'network_compression'):
        if args.network_compression and args.remote_ssh_command is None:
            output.error(
                "Network compression can only be used with "
                "remote recovery.\n"
                "HINT: If you want to do a remote recovery "
                "you have to use the --remote-ssh-command option")
            output.close_and_exit()
        server.config.network_compression = args.network_compression
    with closing(server):
        server.recover(backup_id,
                       args.destination_directory,
                       tablespaces=tablespaces,
                       target_tli=args.target_tli,
                       target_time=args.target_time,
                       target_xid=args.target_xid,
                       target_barrier=args.target_barrier,                       
                       target_name=args.target_name,
                       target_immediate=args.target_immediate,
                       exclusive=args.exclusive,
                       remote_command=args.remote_ssh_command)

    output.close_and_exit()
Example #53
0
    def backup(self, wait=False, wait_timeout=None):
        """
        Performs a backup for the server

        :param bool wait: wait for all the required WAL files to be archived
        :param int|None wait_timeout:
        :return BackupInfo: the generated BackupInfo
        """
        _logger.debug("initialising backup information")
        self.executor.init()
        backup_info = None
        try:
            # Create the BackupInfo object representing the backup
            backup_info = LocalBackupInfo(
                self.server,
                backup_id=datetime.datetime.now().strftime('%Y%m%dT%H%M%S'))
            backup_info.set_attribute('systemid', self.server.systemid)
            backup_info.save()
            self.backup_cache_add(backup_info)
            output.info("Starting backup using %s method for server %s in %s",
                        self.mode, self.config.name,
                        backup_info.get_basebackup_directory())

            # Run the pre-backup-script if present.
            script = HookScriptRunner(self, 'backup_script', 'pre')
            script.env_from_backup_info(backup_info)
            script.run()

            # Run the pre-backup-retry-script if present.
            retry_script = RetryHookScriptRunner(self, 'backup_retry_script',
                                                 'pre')
            retry_script.env_from_backup_info(backup_info)
            retry_script.run()

            # Do the backup using the BackupExecutor
            self.executor.backup(backup_info)

            # Create a restore point after a backup
            target_name = 'barman_%s' % backup_info.backup_id
            self.server.postgres.create_restore_point(target_name)

            # Free the Postgres connection
            self.server.postgres.close()

            # Compute backup size and fsync it on disk
            self.backup_fsync_and_set_sizes(backup_info)

            # Mark the backup as WAITING_FOR_WALS
            backup_info.set_attribute("status", BackupInfo.WAITING_FOR_WALS)
        # Use BaseException instead of Exception to catch events like
        # KeyboardInterrupt (e.g.: CTRL-C)
        except BaseException as e:
            msg_lines = force_str(e).strip().splitlines()
            # If the exception has no attached message use the raw
            # type name
            if len(msg_lines) == 0:
                msg_lines = [type(e).__name__]
            if backup_info:
                # Use only the first line of exception message
                # in backup_info error field
                backup_info.set_attribute("status", BackupInfo.FAILED)
                backup_info.set_attribute(
                    "error", "failure %s (%s)" %
                    (self.executor.current_action, msg_lines[0]))

            output.error("Backup failed %s.\nDETAILS: %s",
                         self.executor.current_action, '\n'.join(msg_lines))

        else:
            output.info("Backup end at LSN: %s (%s, %08X)",
                        backup_info.end_xlog, backup_info.end_wal,
                        backup_info.end_offset)

            executor = self.executor
            output.info(
                "Backup completed (start time: %s, elapsed time: %s)",
                self.executor.copy_start_time,
                human_readable_timedelta(datetime.datetime.now() -
                                         executor.copy_start_time))

            # If requested, wait for end_wal to be archived
            if wait:
                try:
                    self.server.wait_for_wal(backup_info.end_wal, wait_timeout)
                    self.check_backup(backup_info)
                except KeyboardInterrupt:
                    # Ignore CTRL-C pressed while waiting for WAL files
                    output.info(
                        "Got CTRL-C. Continuing without waiting for '%s' "
                        "to be archived", backup_info.end_wal)

        finally:
            if backup_info:
                backup_info.save()

                # Make sure we are not holding any PostgreSQL connection
                # during the post-backup scripts
                self.server.close()

                # Run the post-backup-retry-script if present.
                try:
                    retry_script = RetryHookScriptRunner(
                        self, 'backup_retry_script', 'post')
                    retry_script.env_from_backup_info(backup_info)
                    retry_script.run()
                except AbortedRetryHookScript as e:
                    # Ignore the ABORT_STOP as it is a post-hook operation
                    _logger.warning(
                        "Ignoring stop request after receiving "
                        "abort (exit code %d) from post-backup "
                        "retry hook script: %s", e.hook.exit_status,
                        e.hook.script)

                # Run the post-backup-script if present.
                script = HookScriptRunner(self, 'backup_script', 'post')
                script.env_from_backup_info(backup_info)
                script.run()

        output.result('backup', backup_info)
        return backup_info
Example #54
0
    def delete_backup(self, backup):
        """
        Delete a backup

        :param backup: the backup to delete
        :return bool: True if deleted, False if could not delete the backup
        """
        available_backups = self.get_available_backups(
            status_filter=(BackupInfo.DONE, ))
        minimum_redundancy = self.server.config.minimum_redundancy
        # Honour minimum required redundancy
        if backup.status == BackupInfo.DONE and \
                minimum_redundancy >= len(available_backups):
            output.warning(
                "Skipping delete of backup %s for server %s "
                "due to minimum redundancy requirements "
                "(minimum redundancy = %s, "
                "current redundancy = %s)", backup.backup_id, self.config.name,
                minimum_redundancy, len(available_backups))
            return False
        # Keep track of when the delete operation started.
        delete_start_time = datetime.datetime.now()

        # Run the pre_delete_script if present.
        script = HookScriptRunner(self, 'delete_script', 'pre')
        script.env_from_backup_info(backup)
        script.run()

        # Run the pre_delete_retry_script if present.
        retry_script = RetryHookScriptRunner(self, 'delete_retry_script',
                                             'pre')
        retry_script.env_from_backup_info(backup)
        retry_script.run()

        output.info("Deleting backup %s for server %s", backup.backup_id,
                    self.config.name)
        previous_backup = self.get_previous_backup(backup.backup_id)
        next_backup = self.get_next_backup(backup.backup_id)
        # Delete all the data contained in the backup
        try:
            self.delete_backup_data(backup)
        except OSError as e:
            output.error("Failure deleting backup %s for server %s.\n%s",
                         backup.backup_id, self.config.name, e)
            return False
        # Check if we are deleting the first available backup
        if not previous_backup:
            # In the case of exclusive backup (default), removes any WAL
            # files associated to the backup being deleted.
            # In the case of concurrent backup, removes only WAL files
            # prior to the start of the backup being deleted, as they
            # might be useful to any concurrent backup started immediately
            # after.
            remove_until = None  # means to remove all WAL files
            if next_backup:
                remove_until = next_backup
            elif BackupOptions.CONCURRENT_BACKUP in self.config.backup_options:
                remove_until = backup

            timelines_to_protect = set()
            # If remove_until is not set there are no backup left
            if remove_until:
                # Retrieve the list of extra timelines that contains at least
                # a backup. On such timelines we don't want to delete any WAL
                for value in self.get_available_backups(
                        BackupInfo.STATUS_ARCHIVING).values():
                    # Ignore the backup that is being deleted
                    if value == backup:
                        continue
                    timelines_to_protect.add(value.timeline)
                # Remove the timeline of `remove_until` from the list.
                # We have enough information to safely delete unused WAL files
                # on it.
                timelines_to_protect -= set([remove_until.timeline])

            output.info("Delete associated WAL segments:")
            for name in self.remove_wal_before_backup(remove_until,
                                                      timelines_to_protect):
                output.info("\t%s", name)
        # As last action, remove the backup directory,
        # ending the delete operation
        try:
            self.delete_basebackup(backup)
        except OSError as e:
            output.error(
                "Failure deleting backup %s for server %s.\n%s\n"
                "Please manually remove the '%s' directory", backup.backup_id,
                self.config.name, e, backup.get_basebackup_directory())
            return False
        self.backup_cache_remove(backup)
        # Save the time of the complete removal of the backup
        delete_end_time = datetime.datetime.now()
        output.info(
            "Deleted backup %s (start time: %s, elapsed time: %s)",
            backup.backup_id, delete_start_time.ctime(),
            human_readable_timedelta(delete_end_time - delete_start_time))

        # Remove the sync lockfile if exists
        sync_lock = ServerBackupSyncLock(self.config.barman_lock_directory,
                                         self.config.name, backup.backup_id)
        if os.path.exists(sync_lock.filename):
            _logger.debug("Deleting backup sync lockfile: %s" %
                          sync_lock.filename)

            os.unlink(sync_lock.filename)

        # Run the post_delete_retry_script if present.
        try:
            retry_script = RetryHookScriptRunner(self, 'delete_retry_script',
                                                 'post')
            retry_script.env_from_backup_info(backup)
            retry_script.run()
        except AbortedRetryHookScript as e:
            # Ignore the ABORT_STOP as it is a post-hook operation
            _logger.warning(
                "Ignoring stop request after receiving "
                "abort (exit code %d) from post-delete "
                "retry hook script: %s", e.hook.exit_status, e.hook.script)

        # Run the post_delete_script if present.
        script = HookScriptRunner(self, 'delete_script', 'post')
        script.env_from_backup_info(backup)
        script.run()

        return True
Example #55
0
def main():
    """
    The main method of Barman
    """
    p = ArghParser(epilog="Barman by EnterpriseDB (www.enterprisedb.com)")
    p.add_argument(
        "-v",
        "--version",
        action="version",
        version="%s\n\nBarman by EnterpriseDB (www.enterprisedb.com)"
        % barman.__version__,
    )
    p.add_argument(
        "-c",
        "--config",
        help="uses a configuration file "
        "(defaults: %s)" % ", ".join(barman.config.Config.CONFIG_FILES),
        default=SUPPRESS,
    )
    p.add_argument(
        "--color",
        "--colour",
        help="Whether to use colors in the output",
        choices=["never", "always", "auto"],
        default="auto",
    )
    p.add_argument(
        "--log-level",
        help="Override the default log level",
        choices=list(get_log_levels()),
        default=SUPPRESS,
    )
    p.add_argument("-q", "--quiet", help="be quiet", action="store_true")
    p.add_argument("-d", "--debug", help="debug output", action="store_true")
    p.add_argument(
        "-f",
        "--format",
        help="output format",
        choices=output.AVAILABLE_WRITERS.keys(),
        default=output.DEFAULT_WRITER,
    )
    p.add_commands(
        [
            archive_wal,
            backup,
            check,
            check_backup,
            cron,
            delete,
            diagnose,
            get_wal,
            list_backup,
            list_files,
            list_server,
            put_wal,
            rebuild_xlogdb,
            receive_wal,
            recover,
            show_backup,
            show_server,
            replication_status,
            status,
            switch_wal,
            switch_xlog,
            sync_info,
            sync_backup,
            sync_wals,
        ]
    )
    # noinspection PyBroadException
    try:
        p.dispatch(pre_call=global_config)
    except KeyboardInterrupt:
        msg = "Process interrupted by user (KeyboardInterrupt)"
        output.error(msg)
    except Exception as e:
        msg = "%s\nSee log file for more details." % e
        output.exception(msg)

    # cleanup output API and exit honoring output.error_occurred and
    # output.error_exit_code
    output.close_and_exit()
Example #56
0
def recover(args):
    """
    Recover a server at a given time or xid
    """
    server = get_server(args)

    # Retrieves the backup
    backup_id = parse_backup_id(server, args)
    if backup_id.status != BackupInfo.DONE:
        output.error(
            "Cannot recover from backup '%s' of server '%s': "
            "backup status is not DONE", args.backup_id, server.config.name)
        output.close_and_exit()

    # decode the tablespace relocation rules
    tablespaces = {}
    if args.tablespace:
        for rule in args.tablespace:
            try:
                tablespaces.update([rule.split(':', 1)])
            except ValueError:
                output.error(
                    "Invalid tablespace relocation rule '%s'\n"
                    "HINT: The valid syntax for a relocation rule is "
                    "NAME:LOCATION", rule)
                output.close_and_exit()

    # validate the rules against the tablespace list
    valid_tablespaces = []
    if backup_id.tablespaces:
        valid_tablespaces = [
            tablespace_data.name for tablespace_data in backup_id.tablespaces
        ]
    for item in tablespaces:
        if item not in valid_tablespaces:
            output.error(
                "Invalid tablespace name '%s'\n"
                "HINT: Please use any of the following "
                "tablespaces: %s", item, ', '.join(valid_tablespaces))
            output.close_and_exit()

    # explicitly disallow the rsync remote syntax (common mistake)
    if ':' in args.destination_directory:
        output.error(
            "The destination directory parameter "
            "cannot contain the ':' character\n"
            "HINT: If you want to do a remote recovery you have to use "
            "the --remote-ssh-command option")
        output.close_and_exit()
    if args.retry_sleep is not None:
        server.config.basebackup_retry_sleep = args.retry_sleep
    if args.retry_times is not None:
        server.config.basebackup_retry_times = args.retry_times
    with closing(server):
        server.recover(backup_id,
                       args.destination_directory,
                       tablespaces=tablespaces,
                       target_tli=args.target_tli,
                       target_time=args.target_time,
                       target_xid=args.target_xid,
                       target_name=args.target_name,
                       exclusive=args.exclusive,
                       remote_command=args.remote_ssh_command)

    output.close_and_exit()
Example #57
0
    def _setup(self, backup_info, remote_command, dest):
        """
        Prepare the recovery_info dictionary for the recovery, as well
        as temporary working directory

        :param barman.infofile.BackupInfo backup_info: representation of a
            backup
        :param str remote_command: ssh command for remote connection
        :return dict: recovery_info dictionary, holding the basic values for a
            recovery
        """
        # Calculate the name of the WAL directory
        if backup_info.version < 100000:
            wal_dest = os.path.join(dest, 'pg_xlog')
        else:
            wal_dest = os.path.join(dest, 'pg_wal')

        recovery_info = {
            'cmd': None,
            'recovery_dest': 'local',
            'rsync': None,
            'configuration_files': [],
            'destination_path': dest,
            'temporary_configuration_files': [],
            'tempdir': tempfile.mkdtemp(prefix='barman_recovery-'),
            'is_pitr': False,
            'wal_dest': wal_dest,
            'get_wal': RecoveryOptions.GET_WAL in self.config.recovery_options,
        }
        # A map that will keep track of the results of the recovery.
        # Used for output generation
        results = {
            'changes': [],
            'warnings': [],
            'delete_barman_xlog': False,
            'missing_files': [],
            'get_wal': False,
        }
        recovery_info['results'] = results

        # Set up a list of configuration files
        recovery_info['configuration_files'].append('postgresql.conf')
        if backup_info.version >= 90400:
            recovery_info['configuration_files'].append('postgresql.auto.conf')

        # Handle remote recovery options
        if remote_command:
            recovery_info['recovery_dest'] = 'remote'
            try:
                recovery_info['rsync'] = RsyncPgData(
                    path=self.server.path,
                    ssh=remote_command,
                    bwlimit=self.config.bandwidth_limit,
                    network_compression=self.config.network_compression)
            except CommandFailedException:
                self._teardown(recovery_info)
                raise

            try:
                # create a UnixRemoteCommand obj if is a remote recovery
                recovery_info['cmd'] = UnixRemoteCommand(remote_command,
                                                         path=self.server.path)
            except FsOperationFailed:
                self._teardown(recovery_info)
                output.error(
                    "Unable to connect to the target host using the command "
                    "'%s'", remote_command)
                output.close_and_exit()
        else:
            # if is a local recovery create a UnixLocalCommand
            recovery_info['cmd'] = UnixLocalCommand()

        return recovery_info
Example #58
0
    def delete_backup(self, backup):
        """
        Delete a backup

        :param backup: the backup to delete
        """
        available_backups = self.get_available_backups()
        minimum_redundancy = self.server.config.minimum_redundancy
        # Honour minimum required redundancy
        if backup.status == BackupInfo.DONE and \
                minimum_redundancy >= len(available_backups):
            output.warning(
                "Skipping delete of backup %s for server %s "
                "due to minimum redundancy requirements "
                "(minimum redundancy = %s, "
                "current redundancy = %s)", backup.backup_id, self.config.name,
                len(available_backups), minimum_redundancy)
            return
        # Keep track of when the delete operation started.
        delete_start_time = datetime.datetime.now()
        output.info("Deleting backup %s for server %s", backup.backup_id,
                    self.config.name)
        previous_backup = self.get_previous_backup(backup.backup_id)
        next_backup = self.get_next_backup(backup.backup_id)
        # Delete all the data contained in the backup
        try:
            self.delete_backup_data(backup)
        except OSError as e:
            output.error("Failure deleting backup %s for server %s.\n%s",
                         backup.backup_id, self.config.name, e)
            return
        # Check if we are deleting the first available backup
        if not previous_backup:
            # In the case of exclusive backup (default), removes any WAL
            # files associated to the backup being deleted.
            # In the case of concurrent backup, removes only WAL files
            # prior to the start of the backup being deleted, as they
            # might be useful to any concurrent backup started immediately
            # after.
            remove_until = None  # means to remove all WAL files
            if next_backup:
                remove_until = next_backup
            elif BackupOptions.CONCURRENT_BACKUP in self.config.backup_options:
                remove_until = backup

            timelines_to_protect = set()
            # If remove_until is not set there are no backup left
            if remove_until:
                # Retrieve the list of extra timelines that contains at least
                # a backup. On such timelines we don't want to delete any WAL
                for value in self.get_available_backups(
                        BackupInfo.STATUS_ARCHIVING).values():
                    # Ignore the backup that is being deleted
                    if value == backup:
                        continue
                    timelines_to_protect.add(value.timeline)
                # Remove the timeline of `remove_until` from the list.
                # We have enough information to safely delete unused WAL files
                # on it.
                timelines_to_protect -= set([remove_until.timeline])

            output.info("Delete associated WAL segments:")
            for name in self.remove_wal_before_backup(remove_until,
                                                      timelines_to_protect):
                output.info("\t%s", name)
        # As last action, remove the backup directory,
        # ending the delete operation
        try:
            self.delete_basebackup(backup)
        except OSError as e:
            output.error(
                "Failure deleting backup %s for server %s.\n%s\n"
                "Please manually remove the '%s' directory", backup.backup_id,
                self.config.name, e, backup.get_basebackup_directory())
            return
        self.backup_cache_remove(backup)
        # Save the time of the complete removal of the backup
        delete_end_time = datetime.datetime.now()
        output.info(
            "Deleted backup %s (start time: %s, elapsed time: %s)",
            backup.backup_id, delete_start_time.ctime(),
            human_readable_timedelta(delete_end_time - delete_start_time))
Example #59
0
    def _set_pitr_targets(self, recovery_info, backup_info, dest, target_name,
                          target_time, target_tli, target_xid):
        """
        Set PITR targets - as specified by the user

        :param dict recovery_info: Dictionary containing all the recovery
            parameters
        :param barman.infofile.BackupInfo backup_info: representation of a
            backup
        :param str dest: destination directory of the recovery
        :param str|None target_name: recovery target name for PITR
        :param str|None target_time: recovery target time for PITR
        :param str|None target_tli: recovery target timeline for PITR
        :param str|None target_xid: recovery target transaction id for PITR
        """
        target_epoch = None
        target_datetime = None
        if (target_time or target_xid
                or (target_tli and target_tli != backup_info.timeline)
                or target_name or recovery_info['get_wal']):
            recovery_info['is_pitr'] = True
            targets = {}
            if target_time:
                # noinspection PyBroadException
                try:
                    target_datetime = dateutil.parser.parse(target_time)
                except ValueError as e:
                    output.error(
                        "unable to parse the target time parameter %r: %s",
                        target_time, e)
                    self._teardown(recovery_info)
                    output.close_and_exit()
                except Exception:
                    # this should not happen, but there is a known bug in
                    # dateutil.parser.parse() implementation
                    # ref: https://bugs.launchpad.net/dateutil/+bug/1247643
                    output.error(
                        "unable to parse the target time parameter %r",
                        target_time)
                    output.close_and_exit()

                target_epoch = (time.mktime(target_datetime.timetuple()) +
                                (target_datetime.microsecond / 1000000.))
                targets['time'] = str(target_datetime)
            if target_xid:
                targets['xid'] = str(target_xid)
            if target_tli and target_tli != backup_info.timeline:
                targets['timeline'] = str(target_tli)
            if target_name:
                targets['name'] = str(target_name)
            output.info(
                "Doing PITR. Recovery target %s",
                (", ".join(["%s: %r" % (k, v) for k, v in targets.items()])))
            recovery_info['wal_dest'] = os.path.join(dest, 'barman_xlog')

            # With a PostgreSQL version older than 8.4, it is the user's
            # responsibility to delete the "barman_xlog" directory as the
            # restore_command option in recovery.conf is not supported
            if backup_info.version < 80400 and \
                    not recovery_info['get_wal']:
                recovery_info['results']['delete_barman_xlog'] = True
        recovery_info['target_epoch'] = target_epoch
        recovery_info['target_datetime'] = target_datetime
Example #60
0
def main():
    """
    The main method of Barman
    """
    p = ArghParser(epilog='Barman by 2ndQuadrant (www.2ndQuadrant.com)')
    p.add_argument(
        '-v',
        '--version',
        action='version',
        version='%s\n\nBarman by 2ndQuadrant (www.2ndQuadrant.com)' %
        barman.__version__)
    p.add_argument('-c',
                   '--config',
                   help='uses a configuration file '
                   '(defaults: %s)' %
                   ', '.join(barman.config.Config.CONFIG_FILES),
                   default=SUPPRESS)
    p.add_argument('--color',
                   '--colour',
                   help='Whether to use colors in the output',
                   choices=['never', 'always', 'auto'],
                   default='auto')
    p.add_argument('-q', '--quiet', help='be quiet', action='store_true')
    p.add_argument('-d', '--debug', help='debug output', action='store_true')
    p.add_argument('-f',
                   '--format',
                   help='output format',
                   choices=output.AVAILABLE_WRITERS.keys(),
                   default=output.DEFAULT_WRITER)
    p.add_commands([
        archive_wal,
        backup,
        check,
        check_backup,
        cron,
        delete,
        diagnose,
        get_wal,
        list_backup,
        list_files,
        list_server,
        put_wal,
        rebuild_xlogdb,
        receive_wal,
        recover,
        show_backup,
        show_server,
        replication_status,
        status,
        switch_wal,
        switch_xlog,
        sync_info,
        sync_backup,
        sync_wals,
    ])
    # noinspection PyBroadException
    try:
        p.dispatch(pre_call=global_config)
    except KeyboardInterrupt:
        msg = "Process interrupted by user (KeyboardInterrupt)"
        output.error(msg)
    except Exception as e:
        msg = "%s\nSee log file for more details." % e
        output.exception(msg)

    # cleanup output API and exit honoring output.error_occurred and
    # output.error_exit_code
    output.close_and_exit()