Esempio n. 1
0
def get_server_list(args=None, skip_disabled=False):
    """
    Get the server list from the configuration

    If args the parameter is None or arg.server_name[0] is 'all'
    returns all defined servers

    :param args: an argparse namespace containing a list server_name parameter
    :param bool skip_disabled: skip disabled servers when 'all' is required
    """
    server_dict = {}
    if args is None or args.server_name[0] == 'all':
        for conf in barman.__config__.servers():
            server = Server(conf)
            if skip_disabled and not conf.active:
                output.info("Skipping disabled server '%s'",
                            conf.name)
                continue
            server_dict[conf.name] = server
    else:
        for server in args.server_name:
            conf = barman.__config__.get_server(server)
            if conf is None:
                server_dict[server] = None
            else:
                server_dict[server] = Server(conf)
    return server_dict
Esempio n. 2
0
    def is_wal_relevant(self, wal_info, first_backup):
        """
        Check the relevance of a WAL file according to a provided BackupInfo
        (usually the oldest on the server) to ensure that the WAL is newer than
        the start_wal of the backup.

        :param WalFileInfo wal_info: the WAL file we are checking
        :param BackupInfo first_backup: the backup used for the checks
            (usually the oldest available on the server)
        """

        # Skip history files
        if xlog.is_history_file(wal_info.name):
            return True

        # If the WAL file has a timeline smaller than the one of
        # the oldest backup it cannot be used in any way.
        wal_timeline = xlog.decode_segment_name(wal_info.name)[0]
        if wal_timeline < first_backup.timeline:
            output.info("\tThe timeline of the WAL file %s (%s), is lower "
                        "than the one of the oldest backup of "
                        "server %s (%s). Moving the WAL in "
                        "the error directory",
                        wal_info.name, wal_timeline, self.config.name,
                        first_backup.timeline)
            return False
        # Manage xlog segments older than the first backup
        if wal_info.name < first_backup.begin_wal:
            output.info("\tOlder than first backup of server %s. "
                        "Moving the WAL file %s in the error directory",
                        self.config.name, wal_info.name)
            return False
        return True
Esempio n. 3
0
def get_wal(args):
    """
    Retrieve WAL_NAME file from SERVER_NAME archive.
    The content will be streamed on standard output unless
    the --output-directory option is specified.
    """
    server = get_server(args, inactive_is_error=True)

    if getattr(args, 'test', None):
        output.info("Ready to retrieve WAL files from the server %s",
                    server.config.name)
        return

    # Retrieve optional arguments. If an argument is not specified,
    # the namespace doesn't contain it due to SUPPRESS default.
    # In that case we pick 'None' using getattr third argument.
    compression = getattr(args, 'compression', None)
    output_directory = getattr(args, 'output_directory', None)
    peek = getattr(args, 'peek', None)

    with closing(server):
        server.get_wal(args.wal_name,
                       compression=compression,
                       output_directory=output_directory,
                       peek=peek)
    output.close_and_exit()
Esempio n. 4
0
    def _truncate_partial_file_if_needed(self, xlog_segment_size):
        """
        Truncate .partial WAL file if size is not 0 or xlog_segment_size

        :param int xlog_segment_size:
        """
        # Retrieve the partial list (only one is expected)
        partial_files = glob(
            os.path.join(self.config.streaming_wals_directory, "*.partial"))

        # Take the last partial file, ignoring wrongly formatted file names
        last_partial = None
        for partial in partial_files:
            if not is_partial_file(partial):
                continue
            if not last_partial or partial > last_partial:
                last_partial = partial

        # Skip further work if there is no good partial file
        if not last_partial:
            return

        # If size is either 0 or wal_segment_size everything is fine...
        partial_size = os.path.getsize(last_partial)
        if partial_size == 0 or partial_size == xlog_segment_size:
            return

        # otherwise truncate the file to be empty. This is safe because
        # pg_receivewal pads the file to the full size before start writing.
        output.info("Truncating partial file %s that has wrong size %s "
                    "while %s was expected." %
                    (last_partial, partial_size, xlog_segment_size))
        open(last_partial, "wb").close()
Esempio n. 5
0
def exec_diagnose(servers, errors_list):
    """
    Diagnostic command: gathers information from backup server
    and from all the configured servers.

    Gathered information should be used for support and problems detection

    :param dict(str,barman.server.Server) servers: list of configured servers
    :param list errors_list: list of global errors
    """
    # global section. info about barman server
    diagnosis = {"global": {}, "servers": {}}
    # barman global config
    diagnosis["global"]["config"] = dict(barman.__config__._global_config)
    diagnosis["global"]["config"]["errors_list"] = errors_list
    try:
        command = fs.UnixLocalCommand()
        # basic system info
        diagnosis["global"]["system_info"] = command.get_system_info()
    except CommandFailedException as e:
        diagnosis["global"]["system_info"] = {"error": repr(e)}
    diagnosis["global"]["system_info"]["barman_ver"] = barman.__version__
    diagnosis["global"]["system_info"]["timestamp"] = datetime.datetime.now()
    # per server section
    for name in sorted(servers):
        server = servers[name]
        if server is None:
            output.error("Unknown server '%s'" % name)
            continue
        # server configuration
        diagnosis["servers"][name] = {}
        diagnosis["servers"][name]["config"] = vars(server.config)
        del diagnosis["servers"][name]["config"]["config"]
        # server system info
        if server.config.ssh_command:
            try:
                command = fs.UnixRemoteCommand(
                    ssh_command=server.config.ssh_command, path=server.path)
                diagnosis["servers"][name][
                    "system_info"] = command.get_system_info()
            except FsOperationFailed:
                pass
        # barman status information for the server
        diagnosis["servers"][name]["status"] = server.get_remote_status()
        # backup list
        backups = server.get_available_backups(BackupInfo.STATUS_ALL)
        diagnosis["servers"][name]["backups"] = backups
        # wal status
        diagnosis["servers"][name]["wals"] = {
            "last_archived_wal_per_timeline":
            server.backup_manager.get_latest_archived_wals_info(),
        }
        # Release any PostgreSQL resource
        server.close()
    output.info(json.dumps(diagnosis,
                           cls=BarmanEncoder,
                           indent=4,
                           sort_keys=True),
                log=False)
Esempio n. 6
0
    def receive_wal(self, reset=False):
        """
        Creates a PgReceiveXlog object and issues the pg_receivexlog command
        for a specific server

        :param bool reset: When set reset the status of receive-wal
        :raise ArchiverFailure: when something goes wrong
        """
        # Ensure the presence of the destination directory
        mkpath(self.config.streaming_wals_directory)

        # Check if is a reset request
        if reset:
            self._reset_streaming_status()
            return

        # Execute basic sanity checks on PostgreSQL connection
        postgres_status = self.server.streaming.get_remote_status()
        if postgres_status["streaming_supported"] is None:
            raise ArchiverFailure(
                'failed opening the PostgreSQL streaming connection')
        elif not postgres_status["streaming_supported"]:
            raise ArchiverFailure(
                'PostgreSQL version too old (%s < 9.2)' %
                self.server.streaming.server_txt_version)
        # Execute basic sanity checks on pg_receivexlog
        remote_status = self.get_remote_status()
        if not remote_status["pg_receivexlog_installed"]:
            raise ArchiverFailure(
                'pg_receivexlog not present in $PATH')
        if not remote_status['pg_receivexlog_compatible']:
            raise ArchiverFailure(
                'pg_receivexlog version not compatible with '
                'PostgreSQL server version')

        # Make sure we are not wasting precious PostgreSQL resources
        self.server.postgres.close()
        self.server.streaming.close()

        _logger.info('Activating WAL archiving through streaming protocol')
        try:
            output_handler = PgReceiveXlog.make_output_handler(
                self.config.name + ': ')
            receive = PgReceiveXlog(remote_status['pg_receivexlog_path'],
                                    self.config.streaming_conninfo,
                                    self.config.streaming_wals_directory,
                                    out_handler=output_handler,
                                    err_handler=output_handler)
            receive.execute()
        except CommandFailedException as e:
            _logger.error(e)
            raise ArchiverFailure("pg_receivexlog exited with an error. "
                                  "Check the logs for more information.")
        except KeyboardInterrupt:
            # This is a normal termination, so there is nothing to do beside
            # informing the user.
            output.info('SIGINT received. Terminate gracefully.')
Esempio n. 7
0
    def backup(self):
        """
        Performs a backup for the server
        """
        _logger.debug("initialising backup information")
        self.executor.init()
        backup_info = None
        try:
            # Create the BackupInfo object representing the backup
            backup_info = BackupInfo(
                self.server,
                backup_id=datetime.datetime.now().strftime('%Y%m%dT%H%M%S'))
            backup_info.save()
            self.backup_cache_add(backup_info)
            output.info(
                "Starting backup for server %s in %s",
                self.config.name,
                backup_info.get_basebackup_directory())

            # Run the pre-backup-script if present.
            script = HookScriptRunner(self, 'backup_script', 'pre')
            script.env_from_backup_info(backup_info)
            script.run()

            # Run the pre-backup-retry-script if present.
            retry_script = RetryHookScriptRunner(
                self, 'backup_retry_script', 'pre')
            retry_script.env_from_backup_info(backup_info)
            retry_script.run()

            # Do the backup using the BackupExecutor
            self.executor.backup(backup_info)

            # Compute backup size and fsync it on disk
            self.backup_fsync_and_set_sizes(backup_info)

            # Mark the backup as DONE
            backup_info.set_attribute("status", "DONE")
        # Use BaseException instead of Exception to catch events like
        # KeyboardInterrupt (e.g.: CRTL-C)
        except BaseException, e:
            msg_lines = str(e).strip().splitlines()
            if backup_info:
                # Use only the first line of exception message
                # in backup_info error field
                backup_info.set_attribute("status", "FAILED")
                # If the exception has no attached message use the raw type name
                if len(msg_lines) == 0:
                    msg_lines = [type(e).__name__]
                backup_info.set_attribute(
                    "error",
                    "failure %s (%s)" % (
                        self.executor.current_action, msg_lines[0]))

            output.error("Backup failed %s.\nDETAILS: %s\n%s",
                         self.executor.current_action, msg_lines[0],
                         '\n'.join(msg_lines[1:]))
Esempio n. 8
0
def exec_diagnose(servers, errors_list):
    """
    Diagnostic command: gathers information from backup server
    and from all the configured servers.

    Gathered information should be used for support and problems detection

    :param dict(str,barman.server.Server) servers: list of configured servers
    :param list errors_list: list of global errors
    """
    # global section. info about barman server
    diagnosis = {'global': {}, 'servers': {}}
    # barman global config
    diagnosis['global']['config'] = dict(barman.__config__._global_config)
    diagnosis['global']['config']['errors_list'] = errors_list
    command = fs.UnixLocalCommand()
    # basic system info
    diagnosis['global']['system_info'] = command.get_system_info()
    diagnosis['global']['system_info']['barman_ver'] = barman.__version__
    # per server section
    for name in sorted(servers):
        server = servers[name]
        if server is None:
            output.error("Unknown server '%s'" % name)
            continue
        # server configuration
        diagnosis['servers'][name] = {}
        diagnosis['servers'][name]['config'] = vars(server.config)
        del diagnosis['servers'][name]['config']['config']
        # server system info
        if server.config.ssh_command:
            try:
                command = fs.UnixRemoteCommand(
                    ssh_command=server.config.ssh_command, path=server.path)
                diagnosis['servers'][name]['system_info'] = (
                    command.get_system_info())
            except FsOperationFailed:
                pass
        # barman statuts information for the server
        diagnosis['servers'][name]['status'] = server.get_remote_status()
        # backup list
        backups = server.get_available_backups(BackupInfo.STATUS_ALL)
        diagnosis['servers'][name]['backups'] = backups
        # wal status
        diagnosis['servers'][name]['wals'] = {
            'last_archived_wal_per_timeline':
            server.backup_manager.get_latest_archived_wals_info(),
        }
        # Release any PostgreSQL resource
        server.close()
    output.info(
        json.dumps(diagnosis,
                   sys.stdout,
                   cls=BarmanEncoder,
                   indent=4,
                   sort_keys=True))
Esempio n. 9
0
 def _reset_streaming_status(self):
     """
     Reset the status of receive-wal removing any .partial files
     """
     output.info("Resetting receive-wal directory status")
     partial_files = glob(os.path.join(
         self.config.streaming_wals_directory, '*.partial'))
     for partial in partial_files:
         output.info("Removing status file %s" % partial)
         os.unlink(partial)
Esempio n. 10
0
 def _reset_streaming_status(self):
     """
     Reset the status of receive-wal removing any .partial files
     """
     output.info("Resetting receive-wal directory status")
     partial_files = glob(
         os.path.join(self.config.streaming_wals_directory, '*.partial'))
     for partial in partial_files:
         output.info("Removing status file %s" % partial)
         os.unlink(partial)
Esempio n. 11
0
File: cli.py Progetto: stig/barman
def list_files(args):
    """
    List all the files for a single backup
    """
    server = get_server(args)

    # Retrieves the backup
    backup_id = parse_backup_id(server, args)
    for line in backup_id.get_list_of_files(args.target):
        output.info(line, log=False)
    output.close_and_exit()
Esempio n. 12
0
    def prepare_tablespaces(self, backup_info, cmd, dest, tablespaces):
        """
        Prepare the directory structure for required tablespaces,
        taking care of tablespaces relocation, if requested.

        :param barman.infofile.BackupInfo backup_info: backup representation
        :param barman.fs.UnixLocalCommand cmd: Object for
            filesystem interaction
        :param str dest: destination dir for the recovery
        :param dict tablespaces: dict of all the tablespaces and their location
        """
        tblspc_dir = os.path.join(dest, 'pg_tblspc')
        try:
            # check for pg_tblspc dir into recovery destination folder.
            # if it does not exists, create it
            cmd.create_dir_if_not_exists(tblspc_dir)
        except FsOperationFailed as e:
            output.exception(
                "unable to initialise tablespace directory "
                "'%s': %s", tblspc_dir, e)
            output.close_and_exit()
        for item in backup_info.tablespaces:

            # build the filename of the link under pg_tblspc directory
            pg_tblspc_file = os.path.join(tblspc_dir, str(item.oid))

            # by default a tablespace goes in the same location where
            # it was on the source server when the backup was taken
            location = item.location

            # if a relocation has been requested for this tablespace,
            # use the target directory provided by the user
            if tablespaces and item.name in tablespaces:
                location = tablespaces[item.name]

            try:
                # remove the current link in pg_tblspc, if it exists
                # (raise an exception if it is a directory)
                cmd.delete_if_exists(pg_tblspc_file)
                # create tablespace location, if does not exist
                # (raise an exception if it is not possible)
                cmd.create_dir_if_not_exists(location)
                # check for write permissions on destination directory
                cmd.check_write_permission(location)
                # create symlink between tablespace and recovery folder
                cmd.create_symbolic_link(location, pg_tblspc_file)
            except FsOperationFailed as e:
                output.exception(
                    "unable to prepare '%s' tablespace "
                    "(destination '%s'): %s", item.name, location, e)
                output.close_and_exit()
            output.info("\t%s, %s, %s", item.oid, item.name, location)
Esempio n. 13
0
    def prepare_tablespaces(self, backup_info, cmd, dest, tablespaces):
        """
        Prepare the directory structure for required tablespaces,
        taking care of tablespaces relocation, if requested.

        :param barman.infofile.BackupInfo backup_info: backup representation
        :param barman.fs.UnixLocalCommand cmd: Object for
            filesystem interaction
        :param str dest: destination dir for the recovery
        :param dict tablespaces: dict of all the tablespaces and their location
        """
        tblspc_dir = os.path.join(dest, 'pg_tblspc')
        try:
            # check for pg_tblspc dir into recovery destination folder.
            # if it does not exists, create it
            cmd.create_dir_if_not_exists(tblspc_dir)
        except FsOperationFailed as e:
            output.exception("unable to initialise tablespace directory "
                             "'%s': %s", tblspc_dir, e)
            output.close_and_exit()
        for item in backup_info.tablespaces:

            # build the filename of the link under pg_tblspc directory
            pg_tblspc_file = os.path.join(tblspc_dir, str(item.oid))

            # by default a tablespace goes in the same location where
            # it was on the source server when the backup was taken
            location = item.location

            # if a relocation has been requested for this tablespace,
            # use the target directory provided by the user
            if tablespaces and item.name in tablespaces:
                location = tablespaces[item.name]

            try:
                # remove the current link in pg_tblspc, if it exists
                # (raise an exception if it is a directory)
                cmd.delete_if_exists(pg_tblspc_file)
                # create tablespace location, if does not exist
                # (raise an exception if it is not possible)
                cmd.create_dir_if_not_exists(location)
                # check for write permissions on destination directory
                cmd.check_write_permission(location)
                # create symlink between tablespace and recovery folder
                cmd.create_symbolic_link(location, pg_tblspc_file)
            except FsOperationFailed as e:
                output.exception("unable to prepare '%s' tablespace "
                                 "(destination '%s'): %s",
                                 item.name, location, e)
                output.close_and_exit()
            output.info("\t%s, %s, %s", item.oid, item.name, location)
Esempio n. 14
0
    def backup_fsync_and_set_sizes(self, backup_info):
        """
        Fsync all files in a backup and set the actual size on disk
        of a backup.

        Also evaluate the deduplication ratio and the deduplicated size if
        applicable.

        :param barman.infofile.BackupInfo backup_info: the backup to update
        """
        # Calculate the base backup size
        self.executor.current_action = "calculating backup size"
        _logger.debug(self.executor.current_action)
        backup_size = 0
        deduplicated_size = 0
        backup_dest = backup_info.get_basebackup_directory()
        for dir_path, _, file_names in os.walk(backup_dest):
            # execute fsync() on the containing directory
            fsync_dir(dir_path)
            # execute fsync() on all the contained files
            for filename in file_names:
                file_path = os.path.join(dir_path, filename)
                file_fd = os.open(file_path, os.O_RDONLY)
                file_stat = os.fstat(file_fd)
                backup_size += file_stat.st_size
                # Excludes hard links from real backup size
                if file_stat.st_nlink == 1:
                    deduplicated_size += file_stat.st_size
                os.fsync(file_fd)
                os.close(file_fd)
        # Save size into BackupInfo object
        backup_info.set_attribute('size', backup_size)
        backup_info.set_attribute('deduplicated_size', deduplicated_size)
        if backup_info.size > 0:
            deduplication_ratio = 1 - (float(
                backup_info.deduplicated_size) / backup_info.size)
        else:
            deduplication_ratio = 0

        if self.config.reuse_backup == 'link':
            output.info(
                "Backup size: %s. Actual size on disk: %s"
                " (-%s deduplication ratio)." % (
                    pretty_size(backup_info.size),
                    pretty_size(backup_info.deduplicated_size),
                    '{percent:.2%}'.format(percent=deduplication_ratio)
                ))
        else:
            output.info("Backup size: %s" %
                        pretty_size(backup_info.size))
Esempio n. 15
0
    def backup_fsync_and_set_sizes(self, backup_info):
        """
        Fsync all files in a backup and set the actual size on disk
        of a backup.

        Also evaluate the deduplication ratio and the deduplicated size if
        applicable.

        :param barman.infofile.BackupInfo backup_info: the backup to update
        """
        # Calculate the base backup size
        self.executor.current_action = "calculating backup size"
        _logger.debug(self.executor.current_action)
        backup_size = 0
        deduplicated_size = 0
        backup_dest = backup_info.get_basebackup_directory()
        for dir_path, _, file_names in os.walk(backup_dest):
            # execute fsync() on the containing directory
            fsync_dir(dir_path)
            # execute fsync() on all the contained files
            for filename in file_names:
                file_path = os.path.join(dir_path, filename)
                file_fd = os.open(file_path, os.O_RDONLY)
                file_stat = os.fstat(file_fd)
                backup_size += file_stat.st_size
                # Excludes hard links from real backup size
                if file_stat.st_nlink == 1:
                    deduplicated_size += file_stat.st_size
                os.fsync(file_fd)
                os.close(file_fd)
        # Save size into BackupInfo object
        backup_info.set_attribute('size', backup_size)
        backup_info.set_attribute('deduplicated_size', deduplicated_size)
        if backup_info.size > 0:
            deduplication_ratio = 1 - (float(
                backup_info.deduplicated_size) / backup_info.size)
        else:
            deduplication_ratio = 0

        if self.config.reuse_backup == 'link':
            output.info(
                "Backup size: %s. Actual size on disk: %s"
                " (-%s deduplication ratio)." % (
                    pretty_size(backup_info.size),
                    pretty_size(backup_info.deduplicated_size),
                    '{percent:.2%}'.format(percent=deduplication_ratio)
                ))
        else:
            output.info("Backup size: %s" %
                        pretty_size(backup_info.size))
Esempio n. 16
0
    def delete_backup(self, backup):
        """
        Delete a backup

        :param backup: the backup to delete
        """
        available_backups = self.get_available_backups()
        minimum_redundancy = self.server.config.minimum_redundancy
        # Honour minimum required redundancy
        if backup.status == BackupInfo.DONE and \
                minimum_redundancy >= len(available_backups):
            output.warning("Skipping delete of backup %s for server %s "
                           "due to minimum redundancy requirements "
                           "(minimum redundancy = %s, "
                           "current redundancy = %s)",
                           backup.backup_id,
                           self.config.name,
                           len(available_backups),
                           minimum_redundancy)
            return

        output.info("Deleting backup %s for server %s",
                    backup.backup_id, self.config.name)
        previous_backup = self.get_previous_backup(backup.backup_id)
        next_backup = self.get_next_backup(backup.backup_id)
        # Delete all the data contained in the backup
        try:
            self.delete_backup_data(backup)
        except OSError as e:
            output.error("Failure deleting backup %s for server %s.\n%s",
                         backup.backup_id, self.config.name, e)
            return
        # Check if we are deleting the first available backup
        if not previous_backup:
            # In the case of exclusive backup (default), removes any WAL
            # files associated to the backup being deleted.
            # In the case of concurrent backup, removes only WAL files
            # prior to the start of the backup being deleted, as they
            # might be useful to any concurrent backup started immediately
            # after.
            remove_until = None  # means to remove all WAL files
            if next_backup:
                remove_until = next_backup
            elif BackupOptions.CONCURRENT_BACKUP in self.config.backup_options:
                remove_until = backup
            output.info("Delete associated WAL segments:")
            for name in self.remove_wal_before_backup(remove_until):
                output.info("\t%s", name)
        # As last action, remove the backup directory,
        # ending the delete operation
        try:
            self.delete_basebackup(backup)
        except OSError as e:
            output.error("Failure deleting backup %s for server %s.\n%s\n"
                         "Please manually remove the '%s' directory",
                         backup.backup_id, self.config.name, e,
                         backup.get_basebackup_directory())
            return
        self.backup_cache_remove(backup)
        output.info("Done")
Esempio n. 17
0
def exec_diagnose(servers, errors_list):
    """
    Diagnostic command: gathers information from backup server
    and from all the configured servers.

    Gathered information should be used for support and problems detection

    :param dict(str,barman.server.Server) servers: list of configured servers
    :param list errors_list: list of global errors
    """
    # global section. info about barman server
    diagnosis = {}
    diagnosis['global'] = {}
    diagnosis['servers'] = {}
    # barman global config
    diagnosis['global']['config'] = dict(barman.__config__._global_config)
    diagnosis['global']['config']['errors_list'] = errors_list
    command = fs.UnixLocalCommand()
    # basic system info
    diagnosis['global']['system_info'] = command.get_system_info()
    diagnosis['global']['system_info']['barman_ver'] = barman.__version__
    # per server section
    for name in sorted(servers):
        server = servers[name]
        if server is None:
            output.error("Unknown server '%s'" % name)
            continue
        # server configuration
        diagnosis['servers'][name] = {}
        diagnosis['servers'][name]['config'] = vars(server.config)
        del diagnosis['servers'][name]['config']['config']
        # server system info
        if server.config.ssh_command:
            try:
                command = fs.UnixRemoteCommand(
                    ssh_command=server.config.ssh_command)
                diagnosis['servers'][name]['system_info'] = (
                    command.get_system_info())
            except FsOperationFailed:
                pass
        # barman statuts information for the server
        diagnosis['servers'][name]['status'] = server.get_remote_status()
        # backup list
        backups = server.get_available_backups(BackupInfo.STATUS_ALL)
        diagnosis['servers'][name]['backups'] = backups
        # Release any PostgreSQL resource
        server.close()
    output.info(json.dumps(diagnosis, sys.stdout, cls=BarmanEncoder, indent=4,
                           sort_keys=True))
Esempio n. 18
0
 def cron_retention_policy(self):
     """
     Retention policy management
     """
     if (self.server.enforce_retention_policies
             and self.config.retention_policy_mode == 'auto'):
         available_backups = self.get_available_backups(
             BackupInfo.STATUS_ALL)
         retention_status = self.config.retention_policy.report()
         for bid in sorted(retention_status.keys()):
             if retention_status[bid] == BackupInfo.OBSOLETE:
                 output.info(
                     "Enforcing retention policy: removing backup %s for "
                     "server %s" % (bid, self.config.name))
                 self.delete_backup(available_backups[bid])
Esempio n. 19
0
def cron():
    """
    Run maintenance tasks
    """
    try:
        with lockfile.GlobalCronLock(barman.__config__.barman_lock_directory):
            servers = [Server(conf) for conf in barman.__config__.servers()
                       if conf.active]
            for server in servers:
                server.cron()
    except lockfile.LockFileBusy:
        output.info("Another cron is running")

    except lockfile.LockFilePermissionDenied, e:
        output.error("Permission denied, unable to access '%s'", e)
Esempio n. 20
0
 def cron_retention_policy(self):
     """
     Retention policy management
     """
     if (self.server.enforce_retention_policies and
             self.config.retention_policy_mode == 'auto'):
         available_backups = self.get_available_backups(
             BackupInfo.STATUS_ALL)
         retention_status = self.config.retention_policy.report()
         for bid in sorted(retention_status.iterkeys()):
             if retention_status[bid] == BackupInfo.OBSOLETE:
                 output.info(
                     "Enforcing retention policy: removing backup %s for "
                     "server %s" % (bid, self.config.name))
                 self.delete_backup(available_backups[bid])
Esempio n. 21
0
    def delete_backup(self, backup):
        """
        Delete a backup

        :param backup: the backup to delete
        """
        available_backups = self.get_available_backups()
        minimum_redundancy = self.server.config.minimum_redundancy
        # Honour minimum required redundancy
        if backup.status == BackupInfo.DONE and \
                minimum_redundancy >= len(available_backups):
            output.warning(
                "Skipping delete of backup %s for server %s "
                "due to minimum redundancy requirements "
                "(minimum redundancy = %s, "
                "current redundancy = %s)", backup.backup_id, self.config.name,
                len(available_backups), minimum_redundancy)
            return

        output.info("Deleting backup %s for server %s", backup.backup_id,
                    self.config.name)
        previous_backup = self.get_previous_backup(backup.backup_id)
        next_backup = self.get_next_backup(backup.backup_id)
        # Delete all the data contained in the backup
        try:
            self.delete_backup_data(backup)
        except OSError as e:
            output.error("Failure deleting backup %s for server %s.\n%s",
                         backup.backup_id, self.config.name, e)
            return
        # Check if we are deleting the first available backup
        if not previous_backup:
            # In the case of exclusive backup (default), removes any WAL
            # files associated to the backup being deleted.
            # In the case of concurrent backup, removes only WAL files
            # prior to the start of the backup being deleted, as they
            # might be useful to any concurrent backup started immediately
            # after.
            remove_until = None  # means to remove all WAL files
            if next_backup:
                remove_until = next_backup
            elif BackupOptions.CONCURRENT_BACKUP in self.config.backup_options:
                remove_until = backup
            output.info("Delete associated WAL segments:")
            for name in self.remove_wal_before_backup(remove_until):
                output.info("\t%s", name)
        # As last action, remove the backup directory,
        # ending the delete operation
        try:
            self.delete_basebackup(backup)
        except OSError as e:
            output.error(
                "Failure deleting backup %s for server %s.\n%s\n"
                "Please manually remove the '%s' directory", backup.backup_id,
                self.config.name, e, backup.get_basebackup_directory())
            return
        self.backup_cache_remove(backup)
        output.info("Done")
Esempio n. 22
0
    def _retrieve_safe_horizon(self, recovery_info, backup_info, dest):
        """
        Retrieve the safe_horizon for smart copy

        If the target directory contains a previous recovery, it is safe to
        pick the least of the two backup "begin times" (the one we are
        recovering now and the one previously recovered in the target
        directory). Set the value in the given recovery_info dictionary.

        :param dict recovery_info: Dictionary containing all the recovery
            parameters
        :param barman.infofile.BackupInfo backup_info: a backup representation
        :param str dest: recovery destination directory
        """
        # noinspection PyBroadException
        try:
            backup_begin_time = backup_info.begin_time
            # Retrieve previously recovered backup metadata (if available)
            dest_info_txt = recovery_info['cmd'].get_file_content(
                os.path.join(dest, '.barman-recover.info'))
            dest_info = BackupInfo(self.server,
                                   info_file=StringIO(dest_info_txt))
            dest_begin_time = dest_info.begin_time
            # Pick the earlier begin time. Both are tz-aware timestamps because
            # BackupInfo class ensure it
            safe_horizon = min(backup_begin_time, dest_begin_time)
            output.info("Using safe horizon time for smart rsync copy: %s",
                        safe_horizon)
        except FsOperationFailed as e:
            # Setting safe_horizon to None will effectively disable
            # the time-based part of smart_copy method. However it is still
            # faster than running all the transfers with checksum enabled.
            #
            # FsOperationFailed means the .barman-recover.info is not available
            # on destination directory
            safe_horizon = None
            _logger.warning(
                'Unable to retrieve safe horizon time '
                'for smart rsync copy: %s', e)
        except Exception as e:
            # Same as above, but something failed decoding .barman-recover.info
            # or comparing times, so log the full traceback
            safe_horizon = None
            _logger.exception(
                'Error retrieving safe horizon time '
                'for smart rsync copy: %s', e)

        recovery_info['safe_horizon'] = safe_horizon
Esempio n. 23
0
    def retrieve_safe_horizon(self, recovery_info, backup_info, dest):
        """
        Retrieve the safe_horizon for smart copy

        If the target directory contains a previous recovery, it is safe to
        pick the least of the two backup "begin times" (the one we are
        recovering now and the one previously recovered in the target
        directory). Set the value in the given recovery_info dictionary.

        :param dict recovery_info: Dictionary containing all the recovery
            parameters
        :param barman.infofile.BackupInfo backup_info: a backup representation
        :param str dest: recovery destination directory
        """
        # noinspection PyBroadException
        try:
            backup_begin_time = backup_info.begin_time
            # Retrieve previously recovered backup metadata (if available)
            dest_info_txt = recovery_info['cmd'].get_file_content(
                os.path.join(dest, '.barman-recover.info'))
            dest_info = BackupInfo(
                self.server,
                info_file=StringIO(dest_info_txt))
            dest_begin_time = dest_info.begin_time
            # Pick the earlier begin time. Both are tz-aware timestamps because
            # BackupInfo class ensure it
            safe_horizon = min(backup_begin_time, dest_begin_time)
            output.info("Using safe horizon time for smart rsync copy: %s",
                        safe_horizon)
        except FsOperationFailed as e:
            # Setting safe_horizon to None will effectively disable
            # the time-based part of smart_copy method. However it is still
            # faster than running all the transfers with checksum enabled.
            #
            # FsOperationFailed means the .barman-recover.info is not available
            # on destination directory
            safe_horizon = None
            _logger.warning('Unable to retrieve safe horizon time '
                            'for smart rsync copy: %s', e)
        except Exception as e:
            # Same as above, but something failed decoding .barman-recover.info
            # or comparing times, so log the full traceback
            safe_horizon = None
            _logger.exception('Error retrieving safe horizon time '
                              'for smart rsync copy: %s', e)

        recovery_info['safe_horizon'] = safe_horizon
Esempio n. 24
0
    def recover(self, backup_info, dest, tablespaces, target_tli,
                target_time, target_xid, target_name,
                exclusive, remote_command):
        """
        Performs a recovery of a backup

        :param barman.infofile.BackupInfo backup_info: the backup to recover
        :param str dest: the destination directory
        :param dict[str,str]|None tablespaces: a tablespace name -> location map
            (for relocation)
        :param str|None target_tli: the target timeline
        :param str|None target_time: the target time
        :param str|None target_xid: the target xid
        :param str|None target_name: the target name created previously with
                            pg_create_restore_point() function call
        :param bool exclusive: whether the recovery is exclusive or not
        :param str|None remote_command: The remote command to recover
                               the base backup, in case of remote backup.
        """

        # Run the cron to be sure the wal catalog is up to date
        # Prepare a map that contains all the objects required for a recovery
        recovery_info = self.setup(backup_info, remote_command, dest)
        output.info("Starting %s restore for server %s using backup %s",
                    recovery_info['recovery_dest'], self.server.config.name,
                    backup_info.backup_id)
        output.info("Destination directory: %s", dest)

        # Set targets for PITR
        self.set_pitr_targets(recovery_info,
                              backup_info, dest,
                              target_name,
                              target_time,
                              target_tli,
                              target_xid)

        # Retrieve the safe_horizon for smart copy
        self.retrieve_safe_horizon(recovery_info, backup_info, dest)

        # check destination directory. If doesn't exist create it
        try:
            recovery_info['cmd'].create_dir_if_not_exists(dest)
        except FsOperationFailed, e:
            output.exception("unable to initialise destination directory "
                             "'%s': %s", dest, e)
            output.close_and_exit()
Esempio n. 25
0
def list_files(args):
    """
    List all the files for a single backup
    """
    server = get_server(args)
    if server is None:
        output.error("Unknown server '%s'", args.server_name)
        output.close_and_exit()
    # Retrieves the backup
    backup = parse_backup_id(server, args)
    if backup is None:
        output.error("Unknown backup '%s' for server '%s'", args.backup_id,
                     args.server_name)
        output.close_and_exit()
    for line in backup.get_list_of_files(args.target):
        output.info(line, log=False)
    output.close_and_exit()
Esempio n. 26
0
    def backup(self, backup_info):
        """
        Perform a backup for the server - invoked by BackupManager.backup()
        through the generic interface of a BackupExecutor. This implementation
        is responsible for performing a backup through a remote connection
        to the PostgreSQL server via Ssh. The specific set of instructions
        depends on both the specific class that derives from SshBackupExecutor
        and the selected strategy (e.g. exclusive backup through Rsync).

        :param barman.infofile.BackupInfo backup_info: backup information
        """

        # Start the backup, all the subsequent code must be wrapped in a
        # try except block which finally issues a backup_stop command
        try:
            self.strategy.start_backup(backup_info)
        except BaseException:
            self._update_action_from_strategy()
            raise

        try:
            # save any metadata changed by start_backup() call
            # This must be inside the try-except, because it could fail
            backup_info.save()

            # If this is the first backup, purge unused WAL files
            previous_backup = self.backup_manager.get_previous_backup(
                backup_info.backup_id)
            if not previous_backup:
                self.backup_manager.remove_wal_before_backup(backup_info)

            output.info("Backup start at xlog location: %s (%s, %08X)",
                        backup_info.begin_xlog,
                        backup_info.begin_wal,
                        backup_info.begin_offset)

            # Start the copy
            self.current_action = "copying files"
            output.info("Copying files.")
            # perform the backup copy, honouring the retry option if set
            self.backup_manager.retry_backup_copy(self.backup_copy, backup_info)

            output.info("Copy done.")
        except:
            # we do not need to do anything here besides re-raising the
            # exception. It will be handled in the external try block.
            raise
        else:
            self.current_action = "issuing stop of the backup"
            output.info("Asking PostgreSQL server to finalize the backup.")
        finally:
            try:
                self.strategy.stop_backup(backup_info)
            except BaseException:
                self._update_action_from_strategy()
                raise
Esempio n. 27
0
def cron():
    """
    Run maintenance tasks
    """
    lockname = os.path.join(barman.__config__.barman_home, '.cron.lock')
    try:
        with lockfile.LockFile(lockname, raise_if_fail=True):
            servers = [Server(conf) for conf in barman.__config__.servers()]
            for server in servers:
                server.cron()
    except lockfile.LockFileBusy:
        output.info("Another cron is running")

    except lockfile.LockFilePermissionDenied:
        output.error("Permission denied, unable to access '%s'",
                     lockname)
    output.close_and_exit()
Esempio n. 28
0
def list_files(args):
    """
    List all the files for a single backup
    """
    server = get_server(args)

    # Retrieves the backup
    backup_info = parse_backup_id(server, args)
    try:
        for line in backup_info.get_list_of_files(args.target):
            output.info(line, log=False)
    except BadXlogSegmentName as e:
        output.error(
            "invalid xlog segment name %r\n"
            "HINT: Please run \"barman rebuild-xlogdb %s\" "
            "to solve this issue", str(e), server.config.name)
        output.close_and_exit()
Esempio n. 29
0
def list_files(args):
    """
    List all the files for a single backup
    """
    server = get_server(args)

    # Retrieves the backup
    backup_id = parse_backup_id(server, args)
    try:
        for line in backup_id.get_list_of_files(args.target):
            output.info(line, log=False)
    except BadXlogSegmentName as e:
        output.error(
            "invalid xlog segment name %r\n"
            "HINT: Please run \"barman rebuild-xlogdb %s\" "
            "to solve this issue",
            str(e), server.config.name)
        output.close_and_exit()
Esempio n. 30
0
    def test_info_error(self, caplog):
        # preparation
        writer = self._mock_writer()

        msg = 'test message'
        output.info(msg, is_error=True)

        # logging test
        for record in caplog.records:
            assert record.levelname == 'INFO'
            assert record.name == __name__
        assert msg in caplog.text

        # writer test
        writer.error_occurred.assert_called_once_with()
        writer.info.assert_called_once_with(msg)

        # global status test
        assert output.error_occurred
Esempio n. 31
0
    def test_info_error(self, caplog):
        # preparation
        writer = self._mock_writer()

        msg = 'test message'
        output.info(msg, is_error=True)

        # logging test
        for record in caplog.records():
            assert record.levelname == 'INFO'
            assert record.name == __name__
        assert msg in caplog.text()

        # writer test
        writer.error_occurred.assert_called_once_with()
        writer.info.assert_called_once_with(msg)

        # global status test
        assert output.error_occurred
Esempio n. 32
0
    def test_info(self, caplog):
        # preparation
        writer = self._mock_writer()

        msg = "test message"
        output.info(msg)

        # logging test
        for record in caplog.records:
            assert record.levelname == "INFO"
            assert record.name == __name__
        assert msg in caplog.text

        # writer test
        assert not writer.error_occurred.called
        writer.info.assert_called_once_with(msg)

        # global status test
        assert not output.error_occurred
Esempio n. 33
0
    def check_directories(self, check_strategy):
        """
        Checks backup directories and creates them if they do not exist

        :param CheckStrategy check_strategy: the strategy for the management
             of the results of the various checks
        """

        if self.config.disabled:
            check_strategy.result(self.config.name, 'directories', False)
            for conflict_paths in self.config.msg_list:
                output.info("\t%s" % conflict_paths)
        else:
            try:
                self._make_directories()
            except OSError, e:
                check_strategy.result(self.config.name, 'directories', False,
                                      "%s: %s" % (e.filename, e.strerror))
            else:
Esempio n. 34
0
    def _reset_streaming_status(self, postgres_status, streaming_status):
        """
        Reset the status of receive-wal by removing the .partial file that
        is marking the current position and creating one that is current with
        the PostgreSQL insert location
        """
        current_wal = xlog.location_to_xlogfile_name_offset(
            postgres_status["current_lsn"],
            streaming_status["timeline"],
            postgres_status["xlog_segment_size"],
        )["file_name"]
        restart_wal = current_wal
        if (postgres_status["replication_slot"]
                and postgres_status["replication_slot"].restart_lsn):
            restart_wal = xlog.location_to_xlogfile_name_offset(
                postgres_status["replication_slot"].restart_lsn,
                streaming_status["timeline"],
                postgres_status["xlog_segment_size"],
            )["file_name"]
        restart_path = os.path.join(self.config.streaming_wals_directory,
                                    restart_wal)
        restart_partial_path = restart_path + ".partial"
        wal_files = sorted(glob(
            os.path.join(self.config.streaming_wals_directory, "*")),
                           reverse=True)

        # Pick the newer file
        last = None
        for last in wal_files:
            if xlog.is_wal_file(last) or xlog.is_partial_file(last):
                break

        # Check if the status is already up-to-date
        if not last or last == restart_partial_path or last == restart_path:
            output.info("Nothing to do. Position of receive-wal is aligned.")
            return

        if os.path.basename(last) > current_wal:
            output.error(
                "The receive-wal position is ahead of PostgreSQL "
                "current WAL lsn (%s > %s)",
                os.path.basename(last),
                postgres_status["current_xlog"],
            )
            return

        output.info("Resetting receive-wal directory status")
        if xlog.is_partial_file(last):
            output.info("Removing status file %s" % last)
            os.unlink(last)
        output.info("Creating status file %s" % restart_partial_path)
        open(restart_partial_path, "w").close()
Esempio n. 35
0
    def test_info_with_args(self, caplog):
        # preparation
        writer = self._mock_writer()

        msg = 'test format %02d %s'
        args = (1, '2nd')
        output.info(msg, *args)

        # logging test
        for record in caplog.records():
            assert record.levelname == 'INFO'
            assert record.name == __name__
        assert msg % args in caplog.text()

        # writer test
        assert not writer.error_occurred.called
        writer.info.assert_called_once_with(msg, *args)

        # global status test
        assert not output.error_occurred
Esempio n. 36
0
def put_wal(args):
    """
    Receive a WAL file from SERVER_NAME and securely store it in the incoming
    directory. The file will be read from standard input in tar format.
    """
    server = get_server(args, inactive_is_error=True)

    if getattr(args, "test", None):
        output.info("Ready to accept WAL files for the server %s", server.config.name)
        return

    try:
        # Python 3.x
        stream = sys.stdin.buffer
    except AttributeError:
        # Python 2.x
        stream = sys.stdin
    with closing(server):
        server.put_wal(stream)
    output.close_and_exit()
Esempio n. 37
0
    def test_info_with_args(self, caplog):
        # preparation
        writer = self._mock_writer()

        msg = 'test format %02d %s'
        args = (1, '2nd')
        output.info(msg, *args)

        # logging test
        for record in caplog.records:
            assert record.levelname == 'INFO'
            assert record.name == __name__
        assert msg % args in caplog.text

        # writer test
        assert not writer.error_occurred.called
        writer.info.assert_called_once_with(msg, *args)

        # global status test
        assert not output.error_occurred
Esempio n. 38
0
def sync_info(args):
    """
    Output the internal synchronisation status.
    Used to sync_backup with a passive node
    """
    server = get_server(args)
    try:
        # if called with --primary option
        if getattr(args, 'primary', False):
            primary_info = server.primary_node_info(args.last_wal,
                                                    args.last_position)
            output.info(json.dumps(primary_info, cls=BarmanEncoder, indent=4),
                        log=False)
        else:
            server.sync_status(args.last_wal, args.last_position)
    except SyncError as e:
        # Catch SyncError exceptions and output only the error message,
        # preventing from logging the stack trace
        output.error(e)

    output.close_and_exit()
Esempio n. 39
0
def exec_diagnose(servers):
    """
    Diagnostic command: gathers information from backup server
    and from all the configured servers.

    Gathered information should be used for support and problems detection

    :param servers: list of configured servers
    """
    # global section. info about barman server
    diagnosis = {}
    diagnosis['global'] = {}
    diagnosis['servers'] = {}
    # barman global config
    diagnosis['global']['config'] = dict(barman.__config__._global_config)
    command = fs.UnixLocalCommand()
    # basic system info
    diagnosis['global']['system_info'] = command.get_system_info()
    diagnosis['global']['system_info']['barman_ver'] = barman.__version__
    # per server section
    for name in sorted(servers):
        server = servers[name]
        if server is None:
            output.error("Unknown server '%s'" % name)
            continue
        # server configuration
        diagnosis['servers'][name] = {}
        diagnosis['servers'][name]['config'] = vars(server.config)
        del diagnosis['servers'][name]['config']['config']
        # server system info
        command = fs.UnixRemoteCommand(ssh_command=server.config.ssh_command)
        diagnosis['servers'][name]['system_info'] = command.get_system_info()
        # barman statuts information for the server
        diagnosis['servers'][name]['status'] = server.get_remote_status()
        # backup list
        status_filter = BackupInfo.STATUS_NOT_EMPTY
        backups = server.get_available_backups(status_filter)
        diagnosis['servers'][name]['backups'] = backups
    output.info(json.dumps(diagnosis, sys.stdout, cls=BarmanEncoder, indent=4,
                           sort_keys=True))
Esempio n. 40
0
    def create_backup_manifest(self):
        """
        Will create a manifest file if it doesn't exists.
        :return:
        """

        if self.file_manager.file_exist(self._get_manifest_file_path()):
            msg = ("File %s already exists. Skip file creation." %
                   self._get_manifest_file_path())
            logging.info(msg)
            output.info(msg)
            return
        self._create_files_metadata()
        str_manifest = self._get_manifest_str()
        # Create checksum from string without last '}' and ',' instead
        manifest_checksum = self.checksum_algorithm.checksum_from_str(
            str_manifest)
        last_line = '"Manifest-Checksum": "%s"}\n' % manifest_checksum
        full_manifest = str_manifest + last_line
        self.file_manager.save_content_to_file(self._get_manifest_file_path(),
                                               full_manifest.encode(),
                                               file_mode="wb")
Esempio n. 41
0
    def backup(self, backup_info):
        """
        Implementation of the BackupExecutor.backup(backup_info) method.
        Execute the copy of a backup from a remote server using rsync

        :param barman.infofile.BackupInfo backup_info: the object representing
            the backup.
        :returns: the representation of a finalized backup.
        """

        # Start the backup, all the subsequent code must be wrapped in a
        # try except block which finally issue a backup_stop command
        self.start_backup(backup_info)
        try:
            # save any metadata changed by start_backup() call
            # This must be inside the try-except, because it could fail
            backup_info.save()

            # If we are the first backup, purge unused WAL files
            previous_backup = self.backup_manager.get_previous_backup(
                backup_info.backup_id)
            if not previous_backup:
                self.backup_manager.remove_wal_before_backup(backup_info)

            output.info("Backup start at xlog location: %s (%s, %08X)",
                        backup_info.begin_xlog,
                        backup_info.begin_wal,
                        backup_info.begin_offset)

            # Start the copy
            self.current_action = "copying files"
            output.info("Copying files.")
            # perform the backup copy, honouring the retry option if set
            self.backup_manager.retry_backup_copy(self.backup_copy, backup_info)

            output.info("Copy done.")
        except:
            # we do not need to do anything here besides re-raising the
            # exception. It will be handled in the external try block.
            raise
        else:
            self.current_action = "issuing stop of the backup"
            output.info("Asking PostgreSQL server to finalize the backup.")
        finally:
            self.stop_backup(backup_info)

        if BackupOptions.CONCURRENT_BACKUP in self.config.backup_options:
            self.current_action = "writing backup label"
            self._write_backup_label(backup_info)
Esempio n. 42
0
File: cli.py Progetto: stig/barman
def cron():
    """
    Run maintenance tasks (global command)
    """
    try:
        with lockfile.GlobalCronLock(barman.__config__.barman_lock_directory):
            # Skip inactive and temporarily disabled servers
            servers = get_server_list(skip_inactive=True, skip_disabled=True)
            for name in sorted(servers):
                server = servers[name]

                # Exception: manage_server_command is not invoked here
                # Normally you would call manage_server_command to check if the
                # server is None and to report inactive and disabled servers,
                # but here we have only active and well configured servers.

                server.cron()

    except lockfile.LockFileBusy:
        output.info("Another cron is running")

    except lockfile.LockFilePermissionDenied, e:
        output.error("Permission denied, unable to access '%s'", e)
Esempio n. 43
0
    def _set_pitr_targets(self, recovery_info, backup_info, dest, target_name,
                          target_time, target_tli, target_xid,
                          target_immediate, target_action):
        """
        Set PITR targets - as specified by the user

        :param dict recovery_info: Dictionary containing all the recovery
            parameters
        :param barman.infofile.BackupInfo backup_info: representation of a
            backup
        :param str dest: destination directory of the recovery
        :param str|None target_name: recovery target name for PITR
        :param str|None target_time: recovery target time for PITR
        :param str|None target_tli: recovery target timeline for PITR
        :param str|None target_xid: recovery target transaction id for PITR
        :param bool|None target_immediate: end recovery as soon as consistency
            is reached
        :param str|None target_action: recovery target action for PITR
        """
        target_epoch = None
        target_datetime = None
        d_immediate = backup_info.version >= 90400 and target_immediate
        d_tli = target_tli and target_tli != backup_info.timeline
        # Detect PITR
        if target_time or target_xid or d_tli or target_name or d_immediate:
            recovery_info['is_pitr'] = True
            targets = {}
            if target_time:
                try:
                    target_datetime = dateutil.parser.parse(target_time)
                except ValueError as e:
                    raise RecoveryInvalidTargetException(
                        "Unable to parse the target time parameter %r: %s" %
                        (target_time, e))
                except TypeError:
                    # this should not happen, but there is a known bug in
                    # dateutil.parser.parse() implementation
                    # ref: https://bugs.launchpad.net/dateutil/+bug/1247643
                    raise RecoveryInvalidTargetException(
                        "Unable to parse the target time parameter %r" %
                        target_time)

                # If the parsed timestamp is naive, forces it to local timezone
                if target_datetime.tzinfo is None:
                    target_datetime = target_datetime.replace(
                        tzinfo=dateutil.tz.tzlocal())

                # Check if the target time is reachable from the
                # selected backup
                if backup_info.end_time > target_datetime:
                    raise RecoveryInvalidTargetException(
                        "The requested target time %s "
                        "is before the backup end time %s" %
                        (target_datetime, backup_info.end_time))

                ms = target_datetime.microsecond / 1000000.
                target_epoch = time.mktime(target_datetime.timetuple()) + ms
                targets['time'] = str(target_datetime)
            if target_xid:
                targets['xid'] = str(target_xid)
            if target_tli and target_tli != backup_info.timeline:
                targets['timeline'] = str(target_tli)
            if target_name:
                targets['name'] = str(target_name)
            if target_immediate:
                targets['immediate'] = target_immediate

            # Manage the target_action option
            if backup_info.version < 90100:
                if target_action:
                    raise RecoveryTargetActionException(
                        "Illegal target action '%s' "
                        "for this version of PostgreSQL" % target_action)
            elif 90100 <= backup_info.version < 90500:
                if target_action == 'pause':
                    recovery_info['pause_at_recovery_target'] = "on"
                elif target_action:
                    raise RecoveryTargetActionException(
                        "Illegal target action '%s' "
                        "for this version of PostgreSQL" % target_action)
            else:
                if target_action in ('pause', 'shutdown', 'promote'):
                    recovery_info['recovery_target_action'] = target_action
                elif target_action:
                    raise RecoveryTargetActionException(
                        "Illegal target action '%s' "
                        "for this version of PostgreSQL" % target_action)

            output.info(
                "Doing PITR. Recovery target %s",
                (", ".join(["%s: %r" % (k, v) for k, v in targets.items()])))
            recovery_info['wal_dest'] = os.path.join(dest, 'barman_xlog')

            # With a PostgreSQL version older than 8.4, it is the user's
            # responsibility to delete the "barman_xlog" directory as the
            # restore_command option in recovery.conf is not supported
            if backup_info.version < 80400 and \
                    not recovery_info['get_wal']:
                recovery_info['results']['delete_barman_xlog'] = True
        else:
            if target_action:
                raise RecoveryTargetActionException(
                    "Can't enable recovery target action when PITR "
                    "is not required")

        recovery_info['target_epoch'] = target_epoch
        recovery_info['target_datetime'] = target_datetime
Esempio n. 44
0
    def recover(self,
                backup_info,
                dest,
                tablespaces=None,
                remote_command=None,
                target_tli=None,
                target_time=None,
                target_xid=None,
                target_name=None,
                target_immediate=False,
                exclusive=False,
                target_action=None,
                standby_mode=None):
        """
        Performs a recovery of a backup

        This method should be called in a closing context

        :param barman.infofile.BackupInfo backup_info: the backup to recover
        :param str dest: the destination directory
        :param dict[str,str]|None tablespaces: a tablespace
            name -> location map (for relocation)
        :param str|None remote_command: The remote command to recover
                               the base backup, in case of remote backup.
        :param str|None target_tli: the target timeline
        :param str|None target_time: the target time
        :param str|None target_xid: the target xid
        :param str|None target_name: the target name created previously with
                            pg_create_restore_point() function call
        :param str|None target_immediate: end recovery as soon as consistency
            is reached
        :param bool exclusive: whether the recovery is exclusive or not
        :param str|None target_action: The recovery target action
        :param bool|None standby_mode: standby mode
        """

        # Run the cron to be sure the wal catalog is up to date
        # Prepare a map that contains all the objects required for a recovery
        recovery_info = self._setup(backup_info, remote_command, dest)
        output.info("Starting %s restore for server %s using backup %s",
                    recovery_info['recovery_dest'], self.server.config.name,
                    backup_info.backup_id)
        output.info("Destination directory: %s", dest)
        if remote_command:
            output.info("Remote command: %s", remote_command)

        # If the backup we are recovering is still not validated and we
        # haven't requested the get-wal feature, display a warning message
        if not recovery_info['get_wal']:
            if backup_info.status == BackupInfo.WAITING_FOR_WALS:
                output.warning(
                    "IMPORTANT: You have requested a recovery operation for "
                    "a backup that does not have yet all the WAL files that "
                    "are required for consistency.")

        # Set targets for PITR
        self._set_pitr_targets(recovery_info, backup_info, dest, target_name,
                               target_time, target_tli, target_xid,
                               target_immediate, target_action)

        # Retrieve the safe_horizon for smart copy
        self._retrieve_safe_horizon(recovery_info, backup_info, dest)

        # check destination directory. If doesn't exist create it
        try:
            recovery_info['cmd'].create_dir_if_not_exists(dest)
        except FsOperationFailed as e:
            output.error(
                "unable to initialise destination directory "
                "'%s': %s", dest, e)
            output.close_and_exit()

        # Initialize tablespace directories
        if backup_info.tablespaces:
            self._prepare_tablespaces(backup_info, recovery_info['cmd'], dest,
                                      tablespaces)
        # Copy the base backup
        output.info("Copying the base backup.")
        try:
            self._backup_copy(backup_info, dest, tablespaces, remote_command,
                              recovery_info['safe_horizon'])
        except DataTransferFailure as e:
            output.error("Failure copying base backup: %s", e)
            output.close_and_exit()

        # Copy the backup.info file in the destination as
        # ".barman-recover.info"
        if remote_command:
            try:
                recovery_info['rsync'](backup_info.filename,
                                       ':%s/.barman-recover.info' % dest)
            except CommandFailedException as e:
                output.error('copy of recovery metadata file failed: %s', e)
                output.close_and_exit()
        else:
            backup_info.save(os.path.join(dest, '.barman-recover.info'))

        # Standby mode is not available for PostgreSQL older than 9.0
        if backup_info.version < 90000 and standby_mode:
            raise RecoveryStandbyModeException(
                'standby_mode is available only from PostgreSQL 9.0')

        # Restore the WAL segments. If GET_WAL option is set, skip this phase
        # as they will be retrieved using the wal-get command.
        if not recovery_info['get_wal']:
            # If the backup we restored is still waiting for WALS, read the
            # backup info again and check whether it has been validated.
            # Notify the user if it is still not DONE.
            if backup_info.status == BackupInfo.WAITING_FOR_WALS:
                data = BackupInfo(self.server, backup_info.filename)
                if data.status == BackupInfo.WAITING_FOR_WALS:
                    output.warning(
                        "IMPORTANT: The backup we have recovered IS NOT "
                        "VALID. Required WAL files for consistency are "
                        "missing. Please verify that WAL archiving is "
                        "working correctly or evaluate using the 'get-wal' "
                        "option for recovery")

            output.info("Copying required WAL segments.")

            try:
                # Retrieve a list of required log files
                required_xlog_files = tuple(
                    self.server.get_required_xlog_files(
                        backup_info, target_tli,
                        recovery_info['target_epoch']))

                # Restore WAL segments into the wal_dest directory
                self._xlog_copy(required_xlog_files, recovery_info['wal_dest'],
                                remote_command)
            except DataTransferFailure as e:
                output.error("Failure copying WAL files: %s", e)
                output.close_and_exit()
            except BadXlogSegmentName as e:
                output.error(
                    "invalid xlog segment name %r\n"
                    "HINT: Please run \"barman rebuild-xlogdb %s\" "
                    "to solve this issue", force_str(e), self.config.name)
                output.close_and_exit()
            # If WAL files are put directly in the pg_xlog directory,
            # avoid shipping of just recovered files
            # by creating the corresponding archive status file
            if not recovery_info['is_pitr']:
                output.info("Generating archive status files")
                self._generate_archive_status(recovery_info, remote_command,
                                              required_xlog_files)

        # Generate recovery.conf file (only if needed by PITR or get_wal)
        is_pitr = recovery_info['is_pitr']
        get_wal = recovery_info['get_wal']
        if is_pitr or get_wal or standby_mode:
            output.info("Generating recovery.conf")
            self._generate_recovery_conf(recovery_info, backup_info, dest,
                                         target_immediate, exclusive,
                                         remote_command, target_name,
                                         target_time, target_tli, target_xid,
                                         standby_mode)

        # Create archive_status directory if necessary
        archive_status_dir = os.path.join(recovery_info['wal_dest'],
                                          'archive_status')
        try:
            recovery_info['cmd'].create_dir_if_not_exists(archive_status_dir)
        except FsOperationFailed as e:
            output.error(
                "unable to create the archive_status directory "
                "'%s': %s", archive_status_dir, e)
            output.close_and_exit()

        # As last step, analyse configuration files in order to spot
        # harmful options. Barman performs automatic conversion of
        # some options as well as notifying users of their existence.
        #
        # This operation is performed in three steps:
        # 1) mapping
        # 2) analysis
        # 3) copy
        output.info("Identify dangerous settings in destination directory.")

        self._map_temporary_config_files(recovery_info, backup_info,
                                         remote_command)
        self._analyse_temporary_config_files(recovery_info)
        self._copy_temporary_config_files(dest, remote_command, recovery_info)

        return recovery_info
Esempio n. 45
0
    def _set_pitr_targets(self, recovery_info, backup_info, dest, target_name,
                          target_time, target_tli, target_xid):
        """
        Set PITR targets - as specified by the user

        :param dict recovery_info: Dictionary containing all the recovery
            parameters
        :param barman.infofile.BackupInfo backup_info: representation of a
            backup
        :param str dest: destination directory of the recovery
        :param str|None target_name: recovery target name for PITR
        :param str|None target_time: recovery target time for PITR
        :param str|None target_tli: recovery target timeline for PITR
        :param str|None target_xid: recovery target transaction id for PITR
        """
        target_epoch = None
        target_datetime = None
        if (target_time or target_xid
                or (target_tli and target_tli != backup_info.timeline)
                or target_name or recovery_info['get_wal']):
            recovery_info['is_pitr'] = True
            targets = {}
            if target_time:
                # noinspection PyBroadException
                try:
                    target_datetime = dateutil.parser.parse(target_time)
                except ValueError as e:
                    output.error(
                        "unable to parse the target time parameter %r: %s",
                        target_time, e)
                    self._teardown(recovery_info)
                    output.close_and_exit()
                except Exception:
                    # this should not happen, but there is a known bug in
                    # dateutil.parser.parse() implementation
                    # ref: https://bugs.launchpad.net/dateutil/+bug/1247643
                    output.error(
                        "unable to parse the target time parameter %r",
                        target_time)
                    output.close_and_exit()

                target_epoch = (time.mktime(target_datetime.timetuple()) +
                                (target_datetime.microsecond / 1000000.))
                targets['time'] = str(target_datetime)
            if target_xid:
                targets['xid'] = str(target_xid)
            if target_tli and target_tli != backup_info.timeline:
                targets['timeline'] = str(target_tli)
            if target_name:
                targets['name'] = str(target_name)
            output.info(
                "Doing PITR. Recovery target %s",
                (", ".join(["%s: %r" % (k, v) for k, v in targets.items()])))
            recovery_info['wal_dest'] = os.path.join(dest, 'barman_xlog')

            # With a PostgreSQL version older than 8.4, it is the user's
            # responsibility to delete the "barman_xlog" directory as the
            # restore_command option in recovery.conf is not supported
            if backup_info.version < 80400 and \
                    not recovery_info['get_wal']:
                recovery_info['results']['delete_barman_xlog'] = True
        recovery_info['target_epoch'] = target_epoch
        recovery_info['target_datetime'] = target_datetime
Esempio n. 46
0
    def backup(self, wait=False, wait_timeout=None):
        """
        Performs a backup for the server

        :param bool wait: wait for all the required WAL files to be archived
        :param int|None wait_timeout:
        :return BackupInfo: the generated BackupInfo
        """
        _logger.debug("initialising backup information")
        self.executor.init()
        backup_info = None
        try:
            # Create the BackupInfo object representing the backup
            backup_info = LocalBackupInfo(
                self.server,
                backup_id=datetime.datetime.now().strftime('%Y%m%dT%H%M%S'))
            backup_info.set_attribute('systemid', self.server.systemid)
            backup_info.save()
            self.backup_cache_add(backup_info)
            output.info("Starting backup using %s method for server %s in %s",
                        self.mode, self.config.name,
                        backup_info.get_basebackup_directory())

            # Run the pre-backup-script if present.
            script = HookScriptRunner(self, 'backup_script', 'pre')
            script.env_from_backup_info(backup_info)
            script.run()

            # Run the pre-backup-retry-script if present.
            retry_script = RetryHookScriptRunner(self, 'backup_retry_script',
                                                 'pre')
            retry_script.env_from_backup_info(backup_info)
            retry_script.run()

            # Do the backup using the BackupExecutor
            self.executor.backup(backup_info)

            # Create a restore point after a backup
            target_name = 'barman_%s' % backup_info.backup_id
            self.server.postgres.create_restore_point(target_name)

            # Free the Postgres connection
            self.server.postgres.close()

            # Compute backup size and fsync it on disk
            self.backup_fsync_and_set_sizes(backup_info)

            # Mark the backup as WAITING_FOR_WALS
            backup_info.set_attribute("status", BackupInfo.WAITING_FOR_WALS)
        # Use BaseException instead of Exception to catch events like
        # KeyboardInterrupt (e.g.: CTRL-C)
        except BaseException as e:
            msg_lines = force_str(e).strip().splitlines()
            # If the exception has no attached message use the raw
            # type name
            if len(msg_lines) == 0:
                msg_lines = [type(e).__name__]
            if backup_info:
                # Use only the first line of exception message
                # in backup_info error field
                backup_info.set_attribute("status", BackupInfo.FAILED)
                backup_info.set_attribute(
                    "error", "failure %s (%s)" %
                    (self.executor.current_action, msg_lines[0]))

            output.error("Backup failed %s.\nDETAILS: %s",
                         self.executor.current_action, '\n'.join(msg_lines))

        else:
            output.info("Backup end at LSN: %s (%s, %08X)",
                        backup_info.end_xlog, backup_info.end_wal,
                        backup_info.end_offset)

            executor = self.executor
            output.info(
                "Backup completed (start time: %s, elapsed time: %s)",
                self.executor.copy_start_time,
                human_readable_timedelta(datetime.datetime.now() -
                                         executor.copy_start_time))

            # If requested, wait for end_wal to be archived
            if wait:
                try:
                    self.server.wait_for_wal(backup_info.end_wal, wait_timeout)
                    self.check_backup(backup_info)
                except KeyboardInterrupt:
                    # Ignore CTRL-C pressed while waiting for WAL files
                    output.info(
                        "Got CTRL-C. Continuing without waiting for '%s' "
                        "to be archived", backup_info.end_wal)

        finally:
            if backup_info:
                backup_info.save()

                # Make sure we are not holding any PostgreSQL connection
                # during the post-backup scripts
                self.server.close()

                # Run the post-backup-retry-script if present.
                try:
                    retry_script = RetryHookScriptRunner(
                        self, 'backup_retry_script', 'post')
                    retry_script.env_from_backup_info(backup_info)
                    retry_script.run()
                except AbortedRetryHookScript as e:
                    # Ignore the ABORT_STOP as it is a post-hook operation
                    _logger.warning(
                        "Ignoring stop request after receiving "
                        "abort (exit code %d) from post-backup "
                        "retry hook script: %s", e.hook.exit_status,
                        e.hook.script)

                # Run the post-backup-script if present.
                script = HookScriptRunner(self, 'backup_script', 'post')
                script.env_from_backup_info(backup_info)
                script.run()

        output.result('backup', backup_info)
        return backup_info
Esempio n. 47
0
    def archive(self, fxlogdb, verbose=True):
        """
        Archive WAL files, discarding duplicates or those that are not valid.

        :param file fxlogdb: File object for xlogdb interactions
        :param boolean verbose: Flag for verbose output
        """
        compressor = self.backup_manager.compression_manager.get_compressor()
        stamp = datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%SZ')
        found = False
        if verbose:
            output.info("Processing xlog segments from %s for %s",
                        self.name,
                        self.config.name,
                        log=False)
        batch = self.get_next_batch()
        for wal_info in batch:
            if not found and not verbose:
                output.info("Processing xlog segments from %s for %s",
                            self.name,
                            self.config.name,
                            log=False)
            found = True

            # Report to the user the WAL file we are archiving
            output.info("\t%s", wal_info.name, log=False)
            _logger.info("Archiving %s/%s", self.config.name, wal_info.name)
            # Archive the WAL file
            try:
                self.archive_wal(compressor, wal_info)
            except MatchingDuplicateWalFile:
                # We already have this file. Simply unlink the file.
                os.unlink(wal_info.orig_filename)
                continue
            except DuplicateWalFile:
                output.info("\tError: %s is already present in server %s. "
                            "File moved to errors directory.",
                            wal_info.name,
                            self.config.name)
                error_dst = os.path.join(
                    self.config.errors_directory,
                    "%s.%s.duplicate" % (wal_info.name,
                                         stamp))
                # TODO: cover corner case of duplication (unlikely,
                # but theoretically possible)
                shutil.move(wal_info.orig_filename, error_dst)
                continue
            except AbortedRetryHookScript as e:
                _logger.warning("Archiving of %s/%s aborted by "
                                "pre_archive_retry_script."
                                "Reason: %s" % (self.config.name,
                                                wal_info.name,
                                                e))
                return
            # Updates the information of the WAL archive with
            # the latest segments
            fxlogdb.write(wal_info.to_xlogdb_line())
            # flush and fsync for every line
            fxlogdb.flush()
            os.fsync(fxlogdb.fileno())
        if not found and verbose:
            output.info("\tno file found", log=False)
        if batch.errors:
            output.info("Some unknown objects have been found while "
                        "processing xlog segments for %s. "
                        "Objects moved to errors directory:",
                        self.config.name,
                        log=False)
            for error in batch.errors:
                output.info("\t%s", error)
                error_dst = os.path.join(
                    self.config.errors_directory,
                    "%s.%s.unknown" % (os.path.basename(error), stamp))
                shutil.move(error, error_dst)
Esempio n. 48
0
    def delete_backup(self, backup):
        """
        Delete a backup

        :param backup: the backup to delete
        """
        available_backups = self.get_available_backups()
        minimum_redundancy = self.server.config.minimum_redundancy
        # Honour minimum required redundancy
        if backup.status == BackupInfo.DONE and \
                minimum_redundancy >= len(available_backups):
            output.warning(
                "Skipping delete of backup %s for server %s "
                "due to minimum redundancy requirements "
                "(minimum redundancy = %s, "
                "current redundancy = %s)", backup.backup_id, self.config.name,
                len(available_backups), minimum_redundancy)
            return
        # Keep track of when the delete operation started.
        delete_start_time = datetime.datetime.now()
        output.info("Deleting backup %s for server %s", backup.backup_id,
                    self.config.name)
        previous_backup = self.get_previous_backup(backup.backup_id)
        next_backup = self.get_next_backup(backup.backup_id)
        # Delete all the data contained in the backup
        try:
            self.delete_backup_data(backup)
        except OSError as e:
            output.error("Failure deleting backup %s for server %s.\n%s",
                         backup.backup_id, self.config.name, e)
            return
        # Check if we are deleting the first available backup
        if not previous_backup:
            # In the case of exclusive backup (default), removes any WAL
            # files associated to the backup being deleted.
            # In the case of concurrent backup, removes only WAL files
            # prior to the start of the backup being deleted, as they
            # might be useful to any concurrent backup started immediately
            # after.
            remove_until = None  # means to remove all WAL files
            if next_backup:
                remove_until = next_backup
            elif BackupOptions.CONCURRENT_BACKUP in self.config.backup_options:
                remove_until = backup

            timelines_to_protect = set()
            # If remove_until is not set there are no backup left
            if remove_until:
                # Retrieve the list of extra timelines that contains at least
                # a backup. On such timelines we don't want to delete any WAL
                for value in self.get_available_backups(
                        BackupInfo.STATUS_ARCHIVING).values():
                    # Ignore the backup that is being deleted
                    if value == backup:
                        continue
                    timelines_to_protect.add(value.timeline)
                # Remove the timeline of `remove_until` from the list.
                # We have enough information to safely delete unused WAL files
                # on it.
                timelines_to_protect -= set([remove_until.timeline])

            output.info("Delete associated WAL segments:")
            for name in self.remove_wal_before_backup(remove_until,
                                                      timelines_to_protect):
                output.info("\t%s", name)
        # As last action, remove the backup directory,
        # ending the delete operation
        try:
            self.delete_basebackup(backup)
        except OSError as e:
            output.error(
                "Failure deleting backup %s for server %s.\n%s\n"
                "Please manually remove the '%s' directory", backup.backup_id,
                self.config.name, e, backup.get_basebackup_directory())
            return
        self.backup_cache_remove(backup)
        # Save the time of the complete removal of the backup
        delete_end_time = datetime.datetime.now()
        output.info(
            "Deleted backup %s (start time: %s, elapsed time: %s)",
            backup.backup_id, delete_start_time.ctime(),
            human_readable_timedelta(delete_end_time - delete_start_time))
Esempio n. 49
0
    def rebuild_xlogdb(self):
        """
        Rebuild the whole xlog database guessing it from the archive content.
        """
        from os.path import isdir, join

        output.info("Rebuilding xlogdb for server %s", self.config.name)
        root = self.config.wals_directory
        default_compression = self.config.compression
        wal_count = label_count = history_count = 0
        # lock the xlogdb as we are about replacing it completely
        with self.server.xlogdb('w') as fxlogdb:
            xlogdb_new = fxlogdb.name + ".new"
            with open(xlogdb_new, 'w') as fxlogdb_new:
                for name in sorted(os.listdir(root)):
                    # ignore the xlogdb and its lockfile
                    if name.startswith(self.server.XLOG_DB):
                        continue
                    fullname = join(root, name)
                    if isdir(fullname):
                        # all relevant files are in subdirectories
                        hash_dir = fullname
                        for wal_name in sorted(os.listdir(hash_dir)):
                            fullname = join(hash_dir, wal_name)
                            if isdir(fullname):
                                _logger.warning(
                                    'unexpected directory '
                                    'rebuilding the wal database: %s',
                                    fullname)
                            else:
                                if xlog.is_wal_file(fullname):
                                    wal_count += 1
                                elif xlog.is_backup_file(fullname):
                                    label_count += 1
                                else:
                                    _logger.warning(
                                        'unexpected file '
                                        'rebuilding the wal database: %s',
                                        fullname)
                                    continue
                                wal_info = WalFileInfo.from_file(
                                    fullname,
                                    default_compression=default_compression)
                                fxlogdb_new.write(wal_info.to_xlogdb_line())
                    else:
                        # only history files are here
                        if xlog.is_history_file(fullname):
                            history_count += 1
                            wal_info = WalFileInfo.from_file(
                                fullname,
                                default_compression=default_compression)
                            fxlogdb_new.write(wal_info.to_xlogdb_line())
                        else:
                            _logger.warning(
                                'unexpected file '
                                'rebuilding the wal database: %s',
                                fullname)
                os.fsync(fxlogdb_new.fileno())
            shutil.move(xlogdb_new, fxlogdb.name)
            fsync_dir(os.path.dirname(fxlogdb.name))
        output.info('Done rebuilding xlogdb for server %s '
                    '(history: %s, backup_labels: %s, wal_file: %s)',
                    self.config.name, history_count, label_count, wal_count)
Esempio n. 50
0
    def backup(self):
        """
        Performs a backup for the server
        """
        _logger.debug("initialising backup information")
        self.executor.init()
        backup_info = None
        try:
            # Create the BackupInfo object representing the backup
            backup_info = BackupInfo(
                self.server,
                backup_id=datetime.datetime.now().strftime('%Y%m%dT%H%M%S'))
            backup_info.save()
            self.backup_cache_add(backup_info)
            output.info("Starting backup using %s method for server %s in %s",
                        self.mode, self.config.name,
                        backup_info.get_basebackup_directory())

            # Run the pre-backup-script if present.
            script = HookScriptRunner(self, 'backup_script', 'pre')
            script.env_from_backup_info(backup_info)
            script.run()

            # Run the pre-backup-retry-script if present.
            retry_script = RetryHookScriptRunner(self, 'backup_retry_script',
                                                 'pre')
            retry_script.env_from_backup_info(backup_info)
            retry_script.run()

            # Do the backup using the BackupExecutor
            self.executor.backup(backup_info)

            # Compute backup size and fsync it on disk
            self.backup_fsync_and_set_sizes(backup_info)

            # Mark the backup as DONE
            backup_info.set_attribute("status", "DONE")
        # Use BaseException instead of Exception to catch events like
        # KeyboardInterrupt (e.g.: CRTL-C)
        except BaseException as e:
            msg_lines = str(e).strip().splitlines()
            if backup_info:
                # Use only the first line of exception message
                # in backup_info error field
                backup_info.set_attribute("status", "FAILED")
                # If the exception has no attached message use the raw
                # type name
                if len(msg_lines) == 0:
                    msg_lines = [type(e).__name__]
                backup_info.set_attribute(
                    "error", "failure %s (%s)" %
                    (self.executor.current_action, msg_lines[0]))

            output.error("Backup failed %s.\nDETAILS: %s\n%s",
                         self.executor.current_action, msg_lines[0],
                         '\n'.join(msg_lines[1:]))

        else:
            output.info("Backup end at LSN: %s (%s, %08X)",
                        backup_info.end_xlog, backup_info.end_wal,
                        backup_info.end_offset)
            output.info(
                "Backup completed (start time: %s, elapsed time: %s)",
                self.executor.copy_start_time,
                human_readable_timedelta(self.executor.copy_end_time -
                                         self.executor.copy_start_time))
            # Create a restore point after a backup
            target_name = 'barman_%s' % backup_info.backup_id
            self.server.postgres.create_restore_point(target_name)
        finally:
            if backup_info:
                backup_info.save()

                # Make sure we are not holding any PostgreSQL connection
                # during the post-backup scripts
                self.server.close()

                # Run the post-backup-retry-script if present.
                try:
                    retry_script = RetryHookScriptRunner(
                        self, 'backup_retry_script', 'post')
                    retry_script.env_from_backup_info(backup_info)
                    retry_script.run()
                except AbortedRetryHookScript as e:
                    # Ignore the ABORT_STOP as it is a post-hook operation
                    _logger.warning(
                        "Ignoring stop request after receiving "
                        "abort (exit code %d) from post-backup "
                        "retry hook script: %s", e.hook.exit_status,
                        e.hook.script)

                # Run the post-backup-script if present.
                script = HookScriptRunner(self, 'backup_script', 'post')
                script.env_from_backup_info(backup_info)
                script.run()

        output.result('backup', backup_info)
Esempio n. 51
0
    def archive_wal(self, verbose=True):
        """
        Executes WAL maintenance operations, such as archiving and compression

        If verbose is set to False, outputs something only if there is
        at least one file

        :param bool verbose: report even if no actions
        """
        found = False
        compressor = self.compression_manager.get_compressor()
        with self.server.xlogdb('a') as fxlogdb:
            if verbose:
                output.info("Processing xlog segments for %s",
                            self.config.name,
                            log=False)
            # Get the first available backup
            first_backup_id = self.get_first_backup(BackupInfo.STATUS_NOT_EMPTY)
            first_backup = self.server.get_backup(first_backup_id)
            for filename in sorted(glob(
                    os.path.join(self.config.incoming_wals_directory, '*'))):
                if not found and not verbose:
                    output.info("Processing xlog segments for %s",
                                self.config.name,
                                log=False)
                found = True

                # Create WAL Info object
                wal_info = WalFileInfo.from_file(filename, compression=None)

                # If there are no available backups ...
                if first_backup is None:
                    # ... delete xlog segments only for exclusive backups
                    if BackupOptions.CONCURRENT_BACKUP \
                            not in self.config.backup_options:
                        # Skipping history files
                        if not xlog.is_history_file(filename):
                            output.info("\tNo base backup available."
                                        " Trashing file %s"
                                        " from server %s",
                                        wal_info.name, self.config.name)
                            os.unlink(filename)
                            continue
                # ... otherwise
                else:
                    # ... delete xlog segments older than the first backup
                    if wal_info.name < first_backup.begin_wal:
                        # Skipping history files
                        if not xlog.is_history_file(filename):
                            output.info("\tOlder than first backup."
                                        " Trashing file %s"
                                        " from server %s",
                                        wal_info.name, self.config.name)
                            os.unlink(filename)
                            continue

                # Report to the user the WAL file we are archiving
                output.info("\t%s", os.path.basename(filename), log=False)
                _logger.info("Archiving %s/%s",
                             self.config.name,
                             os.path.basename(filename))
                # Archive the WAL file
                try:
                    self.cron_wal_archival(compressor, wal_info)
                except AbortedRetryHookScript as e:
                    _logger.warning("Archiving of %s/%s aborted by "
                                    "pre_archive_retry_script."
                                    "Reason: %s" % (self.config.name,
                                                    os.path.basename(),
                                                    e))
                    return
                # Updates the information of the WAL archive with
                # the latest segments
                fxlogdb.write(wal_info.to_xlogdb_line())
                # flush and fsync for every line
                fxlogdb.flush()
                os.fsync(fxlogdb.fileno())
        if not found and verbose:
            output.info("\tno file found", log=False)
Esempio n. 52
0
    def delete_backup(self, backup):
        """
        Delete a backup

        :param backup: the backup to delete
        :return bool: True if deleted, False if could not delete the backup
        """
        available_backups = self.get_available_backups(
            status_filter=(BackupInfo.DONE, ))
        minimum_redundancy = self.server.config.minimum_redundancy
        # Honour minimum required redundancy
        if backup.status == BackupInfo.DONE and \
                minimum_redundancy >= len(available_backups):
            output.warning(
                "Skipping delete of backup %s for server %s "
                "due to minimum redundancy requirements "
                "(minimum redundancy = %s, "
                "current redundancy = %s)", backup.backup_id, self.config.name,
                minimum_redundancy, len(available_backups))
            return False
        # Keep track of when the delete operation started.
        delete_start_time = datetime.datetime.now()

        # Run the pre_delete_script if present.
        script = HookScriptRunner(self, 'delete_script', 'pre')
        script.env_from_backup_info(backup)
        script.run()

        # Run the pre_delete_retry_script if present.
        retry_script = RetryHookScriptRunner(self, 'delete_retry_script',
                                             'pre')
        retry_script.env_from_backup_info(backup)
        retry_script.run()

        output.info("Deleting backup %s for server %s", backup.backup_id,
                    self.config.name)
        previous_backup = self.get_previous_backup(backup.backup_id)
        next_backup = self.get_next_backup(backup.backup_id)
        # Delete all the data contained in the backup
        try:
            self.delete_backup_data(backup)
        except OSError as e:
            output.error("Failure deleting backup %s for server %s.\n%s",
                         backup.backup_id, self.config.name, e)
            return False
        # Check if we are deleting the first available backup
        if not previous_backup:
            # In the case of exclusive backup (default), removes any WAL
            # files associated to the backup being deleted.
            # In the case of concurrent backup, removes only WAL files
            # prior to the start of the backup being deleted, as they
            # might be useful to any concurrent backup started immediately
            # after.
            remove_until = None  # means to remove all WAL files
            if next_backup:
                remove_until = next_backup
            elif BackupOptions.CONCURRENT_BACKUP in self.config.backup_options:
                remove_until = backup

            timelines_to_protect = set()
            # If remove_until is not set there are no backup left
            if remove_until:
                # Retrieve the list of extra timelines that contains at least
                # a backup. On such timelines we don't want to delete any WAL
                for value in self.get_available_backups(
                        BackupInfo.STATUS_ARCHIVING).values():
                    # Ignore the backup that is being deleted
                    if value == backup:
                        continue
                    timelines_to_protect.add(value.timeline)
                # Remove the timeline of `remove_until` from the list.
                # We have enough information to safely delete unused WAL files
                # on it.
                timelines_to_protect -= set([remove_until.timeline])

            output.info("Delete associated WAL segments:")
            for name in self.remove_wal_before_backup(remove_until,
                                                      timelines_to_protect):
                output.info("\t%s", name)
        # As last action, remove the backup directory,
        # ending the delete operation
        try:
            self.delete_basebackup(backup)
        except OSError as e:
            output.error(
                "Failure deleting backup %s for server %s.\n%s\n"
                "Please manually remove the '%s' directory", backup.backup_id,
                self.config.name, e, backup.get_basebackup_directory())
            return False
        self.backup_cache_remove(backup)
        # Save the time of the complete removal of the backup
        delete_end_time = datetime.datetime.now()
        output.info(
            "Deleted backup %s (start time: %s, elapsed time: %s)",
            backup.backup_id, delete_start_time.ctime(),
            human_readable_timedelta(delete_end_time - delete_start_time))

        # Remove the sync lockfile if exists
        sync_lock = ServerBackupSyncLock(self.config.barman_lock_directory,
                                         self.config.name, backup.backup_id)
        if os.path.exists(sync_lock.filename):
            _logger.debug("Deleting backup sync lockfile: %s" %
                          sync_lock.filename)

            os.unlink(sync_lock.filename)

        # Run the post_delete_retry_script if present.
        try:
            retry_script = RetryHookScriptRunner(self, 'delete_retry_script',
                                                 'post')
            retry_script.env_from_backup_info(backup)
            retry_script.run()
        except AbortedRetryHookScript as e:
            # Ignore the ABORT_STOP as it is a post-hook operation
            _logger.warning(
                "Ignoring stop request after receiving "
                "abort (exit code %d) from post-delete "
                "retry hook script: %s", e.hook.exit_status, e.hook.script)

        # Run the post_delete_script if present.
        script = HookScriptRunner(self, 'delete_script', 'post')
        script.env_from_backup_info(backup)
        script.run()

        return True
Esempio n. 53
0
def get_server_list(args=None,
                    skip_inactive=False,
                    skip_disabled=False,
                    on_error_stop=True,
                    suppress_error=False):
    """
    Get the server list from the configuration

    If args the parameter is None or arg.server_name is ['all']
    returns all defined servers

    :param args: an argparse namespace containing a list server_name parameter
    :param bool skip_inactive: skip inactive servers when 'all' is required
    :param bool skip_disabled: skip disabled servers when 'all' is required
    :param bool on_error_stop: stop if an error is found
    :param bool suppress_error: suppress display of errors (e.g. diagnose)
    :rtype: dict(str,barman.server.Server|None)
    """
    server_dict = {}

    # This function must to be called with in a multiple-server context
    assert not args or isinstance(args.server_name, list)

    # Generate the list of servers (required for global errors)
    available_servers = barman.__config__.server_names()

    # Get a list of configuration errors from all the servers
    global_error_list = barman.__config__.servers_msg_list

    # Global errors have higher priority
    if global_error_list:
        # Output the list of global errors
        if not suppress_error:
            for error in global_error_list:
                output.error(error)

        # If requested, exit on first error
        if on_error_stop:
            output.close_and_exit()
            # The following return statement will never be reached
            # but it is here for clarity
            return {}

    # Handle special 'all' server cases
    # - args is None
    # - 'all' special name
    if not args or 'all' in args.server_name:
        # When 'all' is used, it must be the only specified argument
        if args and len(args.server_name) != 1:
            output.error("You cannot use 'all' with other server names")
        servers = available_servers
    else:
        servers = args.server_name

    # Loop through all the requested servers
    for server in servers:
        conf = barman.__config__.get_server(server)
        if conf is None:
            # Unknown server
            server_dict[server] = None
        else:
            server_object = Server(conf)
            # Skip inactive servers, if requested
            if skip_inactive and not server_object.config.active:
                output.info("Skipping inactive server '%s'" % conf.name)
                continue
            # Skip disabled servers, if requested
            if skip_disabled and server_object.config.disabled:
                output.info("Skipping temporarily disabled server '%s'" %
                            conf.name)
                continue
            server_dict[server] = server_object

    return server_dict
Esempio n. 54
0
    def set_pitr_targets(self, recovery_info, backup_info, dest, target_name,
                         target_time, target_tli, target_xid):
        """
        Set PITR targets - as specified by the user

        :param dict recovery_info: Dictionary containing all the recovery
            parameters
        :param barman.infofile.BackupInfo backup_info: representation of a
            backup
        :param str dest: destination directory of the recovery
        :param str|None target_name: recovery target name for PITR
        :param str|None target_time: recovery target time for PITR
        :param str|None target_tli: recovery target timeline for PITR
        :param str|None target_xid: recovery target transaction id for PITR
        """
        target_epoch = None
        target_datetime = None
        if (target_time or
                target_xid or
                (target_tli and target_tli != backup_info.timeline) or
                target_name or
                recovery_info['get_wal']):
            recovery_info['is_pitr'] = True
            targets = {}
            if target_time:
                # noinspection PyBroadException
                try:
                    target_datetime = dateutil.parser.parse(target_time)
                except ValueError as e:
                    output.exception(
                        "unable to parse the target time parameter %r: %s",
                        target_time, e)
                    output.close_and_exit()
                except Exception:
                    # this should not happen, but there is a known bug in
                    # dateutil.parser.parse() implementation
                    # ref: https://bugs.launchpad.net/dateutil/+bug/1247643
                    output.exception(
                        "unable to parse the target time parameter %r",
                        target_time)
                    output.close_and_exit()

                target_epoch = (
                    time.mktime(target_datetime.timetuple()) +
                    (target_datetime.microsecond / 1000000.))
                targets['time'] = str(target_datetime)
            if target_xid:
                targets['xid'] = str(target_xid)
            if target_tli and target_tli != backup_info.timeline:
                targets['timeline'] = str(target_tli)
            if target_name:
                targets['name'] = str(target_name)
            output.info(
                "Doing PITR. Recovery target %s",
                (", ".join(["%s: %r" % (k, v) for k, v in targets.items()])))
            recovery_info['wal_dest'] = os.path.join(dest, 'barman_xlog')

            # With a PostgreSQL version older than 8.4, it is the user's
            # responsibility to delete the "barman_xlog" directory as the
            # restore_command option in recovery.conf is not supported
            if backup_info.version < 80400 and \
                    not recovery_info['get_wal']:
                recovery_info['results']['delete_barman_xlog'] = True
        recovery_info['target_epoch'] = target_epoch
        recovery_info['target_datetime'] = target_datetime
Esempio n. 55
0
    def recover(self, backup_info, dest, tablespaces, target_tli,
                target_time, target_xid, target_name,
                exclusive, remote_command):
        """
        Performs a recovery of a backup

        :param barman.infofile.BackupInfo backup_info: the backup to recover
        :param str dest: the destination directory
        :param dict[str,str]|None tablespaces: a tablespace
            name -> location map (for relocation)
        :param str|None target_tli: the target timeline
        :param str|None target_time: the target time
        :param str|None target_xid: the target xid
        :param str|None target_name: the target name created previously with
                            pg_create_restore_point() function call
        :param bool exclusive: whether the recovery is exclusive or not
        :param str|None remote_command: The remote command to recover
                               the base backup, in case of remote backup.
        """

        # Run the cron to be sure the wal catalog is up to date
        # Prepare a map that contains all the objects required for a recovery
        recovery_info = self.setup(backup_info, remote_command, dest)
        output.info("Starting %s restore for server %s using backup %s",
                    recovery_info['recovery_dest'], self.server.config.name,
                    backup_info.backup_id)
        output.info("Destination directory: %s", dest)

        # Set targets for PITR
        self.set_pitr_targets(recovery_info,
                              backup_info, dest,
                              target_name,
                              target_time,
                              target_tli,
                              target_xid)

        # Retrieve the safe_horizon for smart copy
        self.retrieve_safe_horizon(recovery_info, backup_info, dest)

        # check destination directory. If doesn't exist create it
        try:
            recovery_info['cmd'].create_dir_if_not_exists(dest)
        except FsOperationFailed as e:
            output.exception("unable to initialise destination directory "
                             "'%s': %s", dest, e)
            output.close_and_exit()

        # Initialize tablespace directories
        if backup_info.tablespaces:
            self.prepare_tablespaces(backup_info,
                                     recovery_info['cmd'],
                                     dest,
                                     tablespaces)
        # Copy the base backup
        output.info("Copying the base backup.")
        try:
            # perform the backup copy, honoring the retry option if set
            self.backup_manager.retry_backup_copy(
                self.basebackup_copy,
                backup_info, dest,
                tablespaces, remote_command,
                recovery_info['safe_horizon'])
        except DataTransferFailure as e:
            output.exception("Failure copying base backup: %s", e)
            output.close_and_exit()

        # Copy the backup.info file in the destination as
        # ".barman-recover.info"
        if remote_command:
            try:
                recovery_info['rsync'](backup_info.filename,
                                       ':%s/.barman-recover.info' % dest)
            except CommandFailedException as e:
                output.exception(
                    'copy of recovery metadata file failed: %s', e)
                output.close_and_exit()
        else:
            backup_info.save(os.path.join(dest, '.barman-recover.info'))

        # Restore the WAL segments. If GET_WAL option is set, skip this phase
        # as they will be retrieved using the wal-get command.
        if not recovery_info['get_wal']:
            output.info("Copying required WAL segments.")

            try:
                # Retrieve a list of required log files
                required_xlog_files = tuple(
                    self.server.get_required_xlog_files(
                        backup_info, target_tli,
                        recovery_info['target_epoch']))

                # Restore WAL segments into the wal_dest directory
                self.xlog_copy(required_xlog_files,
                               recovery_info['wal_dest'],
                               remote_command)
            except DataTransferFailure as e:
                output.exception("Failure copying WAL files: %s", e)
                output.close_and_exit()
            except xlog.BadXlogSegmentName as e:
                output.error(
                    "invalid xlog segment name %r\n"
                    "HINT: Please run \"barman rebuild-xlogdb %s\" "
                    "to solve this issue" %
                    str(e), self.config.name)
                output.close_and_exit()
            # If WAL files are put directly in the pg_xlog directory,
            # avoid shipping of just recovered files
            # by creating the corresponding archive status file
            if not recovery_info['is_pitr']:
                output.info("Generating archive status files")
                self.generate_archive_status(recovery_info,
                                             remote_command,
                                             required_xlog_files)

        # Generate recovery.conf file (only if needed by PITR)
        if recovery_info['is_pitr']:
            output.info("Generating recovery.conf")
            self.generate_recovery_conf(recovery_info, backup_info, dest,
                                        exclusive, remote_command, target_name,
                                        target_time, target_tli, target_xid)

        # Create archive_status directory if necessary
        archive_status_dir = os.path.join(dest, 'pg_xlog', 'archive_status')
        try:
            recovery_info['cmd'].create_dir_if_not_exists(archive_status_dir)
        except FsOperationFailed as e:
            output.exception("unable to create the archive_status directory "
                             "'%s': %s", archive_status_dir, e)
            output.close_and_exit()

        # As last step, analyse configuration files in order to spot
        # harmful options. Barman performs automatic conversion of
        # some options as well as notifying users of their existence.
        #
        # This operation is performed in three steps:
        # 1) mapping
        # 2) analysis
        # 3) copy
        output.info("Identify dangerous settings in destination directory.")

        self.map_temporary_config_files(recovery_info,
                                        backup_info,
                                        remote_command)
        self.analyse_temporary_config_files(recovery_info)
        self.copy_temporary_config_files(dest,
                                         remote_command,
                                         recovery_info)

        # Cleanup operations
        self.teardown(recovery_info)

        return recovery_info
Esempio n. 56
0
    def receive_wal(self, reset=False):
        """
        Creates a PgReceiveXlog object and issues the pg_receivexlog command
        for a specific server

        :param bool reset: When set reset the status of receive-wal
        :raise ArchiverFailure: when something goes wrong
        """
        # Ensure the presence of the destination directory
        mkpath(self.config.streaming_wals_directory)

        # Check if is a reset request
        if reset:
            self._reset_streaming_status()
            return

        # Execute basic sanity checks on PostgreSQL connection
        streaming_status = self.server.streaming.get_remote_status()
        if streaming_status["streaming_supported"] is None:
            raise ArchiverFailure(
                'failed opening the PostgreSQL streaming connection '
                'for server %s' % (self.config.name))
        elif not streaming_status["streaming_supported"]:
            raise ArchiverFailure('PostgreSQL version too old (%s < 9.2)' %
                                  self.server.streaming.server_txt_version)
        # Execute basic sanity checks on pg_receivexlog
        remote_status = self.get_remote_status()
        if not remote_status["pg_receivexlog_installed"]:
            raise ArchiverFailure('pg_receivexlog not present in $PATH')
        if not remote_status['pg_receivexlog_compatible']:
            raise ArchiverFailure('pg_receivexlog version not compatible with '
                                  'PostgreSQL server version')

        # Execute sanity check on replication slot usage
        if self.config.slot_name:
            # Check if slots are supported
            if not remote_status['pg_receivexlog_supports_slots']:
                raise ArchiverFailure(
                    'Physical replication slot not supported by %s '
                    '(9.4 or higher is required)' %
                    self.server.streaming.server_txt_version)
            # Check if the required slot exists
            postgres_status = self.server.postgres.get_remote_status()
            if postgres_status['replication_slot'] is None:
                raise ArchiverFailure(
                    "replication slot '%s' doesn't exist. "
                    "Please execute "
                    "'barman receive-wal --create-slot %s'" %
                    (self.config.slot_name, self.config.name))
            # Check if the required slot is available
            if postgres_status['replication_slot'].active:
                raise ArchiverFailure(
                    "replication slot '%s' is already in use" %
                    (self.config.slot_name, ))

        # Make sure we are not wasting precious PostgreSQL resources
        self.server.close()

        _logger.info('Activating WAL archiving through streaming protocol')
        try:
            output_handler = PgReceiveXlog.make_output_handler(
                self.config.name + ': ')
            receive = PgReceiveXlog(
                connection=self.server.streaming,
                destination=self.config.streaming_wals_directory,
                command=remote_status['pg_receivexlog_path'],
                version=remote_status['pg_receivexlog_version'],
                app_name=self.config.streaming_archiver_name,
                path=self.server.path,
                slot_name=self.config.slot_name,
                synchronous=remote_status['pg_receivexlog_synchronous'],
                out_handler=output_handler,
                err_handler=output_handler)
            # Finally execute the pg_receivexlog process
            receive.execute()
        except CommandFailedException as e:
            # Retrieve the return code from the exception
            ret_code = e.args[0]['ret']
            if ret_code < 0:
                # If the return code is negative, then pg_receivexlog
                # was terminated by a signal
                msg = "pg_receivexlog terminated by signal: %s" \
                      % abs(ret_code)
            else:
                # Otherwise terminated with an error
                msg = "pg_receivexlog terminated with error code: %s"\
                      % ret_code

            raise ArchiverFailure(msg)
        except KeyboardInterrupt:
            # This is a normal termination, so there is nothing to do beside
            # informing the user.
            output.info('SIGINT received. Terminate gracefully.')
Esempio n. 57
0
    def archive(self, fxlogdb, verbose=True):
        """
        Archive WAL files, discarding duplicates or those that are not valid.

        :param file fxlogdb: File object for xlogdb interactions
        :param boolean verbose: Flag for verbose output
        """
        compressor = self.backup_manager.compression_manager.get_compressor()
        stamp = datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%SZ')
        processed = 0
        header = "Processing xlog segments from %s for %s" % (self.name,
                                                              self.config.name)

        # Get the next batch of WAL files to be processed
        batch = self.get_next_batch()

        # Analyse the batch and properly log the information
        if batch.size:
            if batch.size > batch.run_size:
                # Batch mode enabled
                _logger.info(
                    "Found %s xlog segments from %s for %s."
                    " Archive a batch of %s segments in this run.", batch.size,
                    self.name, self.config.name, batch.run_size)
                header += " (batch size: %s)" % batch.run_size
            else:
                # Single run mode (traditional)
                _logger.info(
                    "Found %s xlog segments from %s for %s."
                    " Archive all segments in one run.", batch.size, self.name,
                    self.config.name)
        else:
            _logger.info("No xlog segments found from %s for %s.", self.name,
                         self.config.name)

        # Print the header (verbose mode)
        if verbose:
            output.info(header, log=False)

        # Loop through all available WAL files
        for wal_info in batch:
            # Print the header (non verbose mode)
            if not processed and not verbose:
                output.info(header, log=False)

            # Exit when archive batch size is reached
            if processed >= batch.run_size:
                _logger.debug(
                    "Batch size reached (%s) - "
                    "Exit %s process for %s", batch.batch_size, self.name,
                    self.config.name)
                break

            processed += 1

            # Report to the user the WAL file we are archiving
            output.info("\t%s", wal_info.name, log=False)
            _logger.info("Archiving segment %s of %s from %s: %s/%s",
                         processed, batch.run_size, self.name,
                         self.config.name, wal_info.name)
            # Archive the WAL file
            try:
                self.archive_wal(compressor, wal_info)
            except MatchingDuplicateWalFile:
                # We already have this file. Simply unlink the file.
                os.unlink(wal_info.orig_filename)
                continue
            except DuplicateWalFile:
                output.info(
                    "\tError: %s is already present in server %s. "
                    "File moved to errors directory.", wal_info.name,
                    self.config.name)
                error_dst = os.path.join(
                    self.config.errors_directory,
                    "%s.%s.duplicate" % (wal_info.name, stamp))
                # TODO: cover corner case of duplication (unlikely,
                # but theoretically possible)
                shutil.move(wal_info.orig_filename, error_dst)
                continue
            except AbortedRetryHookScript as e:
                _logger.warning("Archiving of %s/%s aborted by "
                                "pre_archive_retry_script."
                                "Reason: %s" %
                                (self.config.name, wal_info.name, e))
                return
            # Updates the information of the WAL archive with
            # the latest segments
            fxlogdb.write(wal_info.to_xlogdb_line())
            # flush and fsync for every line
            fxlogdb.flush()
            os.fsync(fxlogdb.fileno())

        if processed:
            _logger.debug("Archived %s out of %s xlog segments from %s for %s",
                          processed, batch.size, self.name, self.config.name)
        elif verbose:
            output.info("\tno file found", log=False)

        if batch.errors:
            output.info(
                "Some unknown objects have been found while "
                "processing xlog segments for %s. "
                "Objects moved to errors directory:",
                self.config.name,
                log=False)
            # Log unexpected files
            _logger.warning(
                "Archiver is about to move %s unexpected file(s) "
                "to errors directory for %s from %s", len(batch.errors),
                self.config.name, self.name)
            for error in batch.errors:
                basename = os.path.basename(error)
                output.info("\t%s", basename, log=False)
                # Print informative log line.
                _logger.warning("Moving unexpected file for %s from %s: %s",
                                self.config.name, self.name, basename)
                error_dst = os.path.join(self.config.errors_directory,
                                         "%s.%s.unknown" % (basename, stamp))
                try:
                    shutil.move(error, error_dst)
                except IOError as e:
                    if e.errno == errno.ENOENT:
                        _logger.warning('%s not found' % error)
Esempio n. 58
0
    def recover(self, backup_info, dest, tablespaces, target_tli, target_time,
                target_xid, target_name, exclusive, remote_command):
        """
        Performs a recovery of a backup

        :param barman.infofile.BackupInfo backup_info: the backup to recover
        :param str dest: the destination directory
        :param dict[str,str]|None tablespaces: a tablespace
            name -> location map (for relocation)
        :param str|None target_tli: the target timeline
        :param str|None target_time: the target time
        :param str|None target_xid: the target xid
        :param str|None target_name: the target name created previously with
                            pg_create_restore_point() function call
        :param bool exclusive: whether the recovery is exclusive or not
        :param str|None remote_command: The remote command to recover
                               the base backup, in case of remote backup.
        """

        # Run the cron to be sure the wal catalog is up to date
        # Prepare a map that contains all the objects required for a recovery
        recovery_info = self._setup(backup_info, remote_command, dest)
        output.info("Starting %s restore for server %s using backup %s",
                    recovery_info['recovery_dest'], self.server.config.name,
                    backup_info.backup_id)
        output.info("Destination directory: %s", dest)

        # Set targets for PITR
        self._set_pitr_targets(recovery_info, backup_info, dest, target_name,
                               target_time, target_tli, target_xid)

        # Retrieve the safe_horizon for smart copy
        self._retrieve_safe_horizon(recovery_info, backup_info, dest)

        # check destination directory. If doesn't exist create it
        try:
            recovery_info['cmd'].create_dir_if_not_exists(dest)
        except FsOperationFailed as e:
            output.error(
                "unable to initialise destination directory "
                "'%s': %s", dest, e)
            output.close_and_exit()

        # Initialize tablespace directories
        if backup_info.tablespaces:
            self._prepare_tablespaces(backup_info, recovery_info['cmd'], dest,
                                      tablespaces)
        # Copy the base backup
        output.info("Copying the base backup.")
        try:
            self._backup_copy(backup_info, dest, tablespaces, remote_command,
                              recovery_info['safe_horizon'])
        except DataTransferFailure as e:
            output.error("Failure copying base backup: %s", e)
            output.close_and_exit()

        # Copy the backup.info file in the destination as
        # ".barman-recover.info"
        if remote_command:
            try:
                recovery_info['rsync'](backup_info.filename,
                                       ':%s/.barman-recover.info' % dest)
            except CommandFailedException as e:
                output.error('copy of recovery metadata file failed: %s', e)
                output.close_and_exit()
        else:
            backup_info.save(os.path.join(dest, '.barman-recover.info'))

        # Restore the WAL segments. If GET_WAL option is set, skip this phase
        # as they will be retrieved using the wal-get command.
        if not recovery_info['get_wal']:
            output.info("Copying required WAL segments.")

            try:
                # Retrieve a list of required log files
                required_xlog_files = tuple(
                    self.server.get_required_xlog_files(
                        backup_info, target_tli,
                        recovery_info['target_epoch']))

                # Restore WAL segments into the wal_dest directory
                self._xlog_copy(required_xlog_files, recovery_info['wal_dest'],
                                remote_command)
            except DataTransferFailure as e:
                output.error("Failure copying WAL files: %s", e)
                output.close_and_exit()
            except BadXlogSegmentName as e:
                output.error(
                    "invalid xlog segment name %r\n"
                    "HINT: Please run \"barman rebuild-xlogdb %s\" "
                    "to solve this issue", str(e), self.config.name)
                output.close_and_exit()
            # If WAL files are put directly in the pg_xlog directory,
            # avoid shipping of just recovered files
            # by creating the corresponding archive status file
            if not recovery_info['is_pitr']:
                output.info("Generating archive status files")
                self._generate_archive_status(recovery_info, remote_command,
                                              required_xlog_files)

        # Generate recovery.conf file (only if needed by PITR)
        if recovery_info['is_pitr']:
            output.info("Generating recovery.conf")
            self._generate_recovery_conf(recovery_info, backup_info, dest,
                                         exclusive, remote_command,
                                         target_name, target_time, target_tli,
                                         target_xid)

        # Create archive_status directory if necessary
        archive_status_dir = os.path.join(recovery_info['wal_dest'],
                                          'archive_status')
        try:
            recovery_info['cmd'].create_dir_if_not_exists(archive_status_dir)
        except FsOperationFailed as e:
            output.error(
                "unable to create the archive_status directory "
                "'%s': %s", archive_status_dir, e)
            output.close_and_exit()

        # As last step, analyse configuration files in order to spot
        # harmful options. Barman performs automatic conversion of
        # some options as well as notifying users of their existence.
        #
        # This operation is performed in three steps:
        # 1) mapping
        # 2) analysis
        # 3) copy
        output.info("Identify dangerous settings in destination directory.")

        self._map_temporary_config_files(recovery_info, backup_info,
                                         remote_command)
        self._analyse_temporary_config_files(recovery_info)
        self._copy_temporary_config_files(dest, remote_command, recovery_info)

        # Cleanup operations
        self._teardown(recovery_info)

        return recovery_info
Esempio n. 59
0
 def handler(line):
     if line:
         if prefix:
             output.info("%s%s", prefix, line)
         else:
             output.info("%s", line)
Esempio n. 60
0
    def rebuild_xlogdb(self):
        """
        Rebuild the whole xlog database guessing it from the archive content.
        """
        from os.path import isdir, join

        output.info("Rebuilding xlogdb for server %s", self.config.name)
        root = self.config.wals_directory
        comp_manager = self.compression_manager
        wal_count = label_count = history_count = 0
        # lock the xlogdb as we are about replacing it completely
        with self.server.xlogdb('w') as fxlogdb:
            xlogdb_new = fxlogdb.name + ".new"
            with open(xlogdb_new, 'w') as fxlogdb_new:
                for name in sorted(os.listdir(root)):
                    # ignore the xlogdb and its lockfile
                    if name.startswith(self.server.XLOG_DB):
                        continue
                    fullname = join(root, name)
                    if isdir(fullname):
                        # all relevant files are in subdirectories
                        hash_dir = fullname
                        for wal_name in sorted(os.listdir(hash_dir)):
                            fullname = join(hash_dir, wal_name)
                            if isdir(fullname):
                                _logger.warning(
                                    'unexpected directory '
                                    'rebuilding the wal database: %s',
                                    fullname)
                            else:
                                if xlog.is_wal_file(fullname):
                                    wal_count += 1
                                elif xlog.is_backup_file(fullname):
                                    label_count += 1
                                elif fullname.endswith('.tmp'):
                                    _logger.warning(
                                        'temporary file found '
                                        'rebuilding the wal database: %s',
                                        fullname)
                                    continue
                                else:
                                    _logger.warning(
                                        'unexpected file '
                                        'rebuilding the wal database: %s',
                                        fullname)
                                    continue
                                wal_info = comp_manager.get_wal_file_info(
                                    fullname)
                                fxlogdb_new.write(wal_info.to_xlogdb_line())
                    else:
                        # only history files are here
                        if xlog.is_history_file(fullname):
                            history_count += 1
                            wal_info = comp_manager.get_wal_file_info(fullname)
                            fxlogdb_new.write(wal_info.to_xlogdb_line())
                        else:
                            _logger.warning(
                                'unexpected file '
                                'rebuilding the wal database: %s', fullname)
                os.fsync(fxlogdb_new.fileno())
        shutil.move(xlogdb_new, fxlogdb.name)
        fsync_dir(os.path.dirname(fxlogdb.name))
        output.info(
            'Done rebuilding xlogdb for server %s '
            '(history: %s, backup_labels: %s, wal_file: %s)', self.config.name,
            history_count, label_count, wal_count)