def xlog_copy(self, required_xlog_files, wal_dest, remote_command):
        """
        Restore WAL segments

        :param required_xlog_files: list of all required WAL files
        :param wal_dest: the destination directory for xlog recover
        :param remote_command: default None. The remote command to recover
               the xlog, in case of remote backup.
        """
        # List of required WAL files partitioned by containing directory
        xlogs = collections.defaultdict(list)
        # add '/' suffix to ensure it is a directory
        wal_dest = '%s/' % wal_dest
        # Map of every compressor used with any WAL file in the archive,
        # to be used during this recovery
        compressors = {}
        compression_manager = self.backup_manager.compression_manager
        # Fill xlogs and compressors maps from required_xlog_files
        for wal_info in required_xlog_files:
            hashdir = xlog.hash_dir(wal_info.name)
            xlogs[hashdir].append(wal_info)
            # If a compressor is required, make sure it exists in the cache
            if wal_info.compression is not None and \
                    wal_info.compression not in compressors:
                compressors[wal_info.compression] = \
                    compression_manager.get_compressor(
                        compression=wal_info.compression)

        rsync = RsyncPgData(
            path=self.server.path,
            ssh=remote_command,
            bwlimit=self.config.bandwidth_limit,
            network_compression=self.config.network_compression)
        # If compression is used and this is a remote recovery, we need a
        # temporary directory where to spool uncompressed files,
        # otherwise we either decompress every WAL file in the local
        # destination, or we ship the uncompressed file remotely
        if compressors:
            if remote_command:
                # Decompress to a temporary spool directory
                wal_decompression_dest = tempfile.mkdtemp(
                    prefix='barman_xlog-')
            else:
                # Decompress directly to the destination directory
                wal_decompression_dest = wal_dest
            # Make sure wal_decompression_dest exists
            mkpath(wal_decompression_dest)
        else:
            # If no compression
            wal_decompression_dest = None
        if remote_command:
            # If remote recovery tell rsync to copy them remotely
            # add ':' prefix to mark it as remote
            wal_dest = ':%s' % wal_dest
        total_wals = sum(map(len, xlogs.values()))
        partial_count = 0
        for prefix in sorted(xlogs):
            batch_len = len(xlogs[prefix])
            partial_count += batch_len
            source_dir = os.path.join(self.config.wals_directory, prefix)
            _logger.info(
                "Starting copy of %s WAL files %s/%s from %s to %s",
                batch_len,
                partial_count,
                total_wals,
                xlogs[prefix][0],
                xlogs[prefix][-1])
            # If at least one compressed file has been found, activate
            # compression check and decompression for each WAL files
            if compressors:
                for segment in xlogs[prefix]:
                    dst_file = os.path.join(wal_decompression_dest,
                                            segment.name)
                    if segment.compression is not None:
                        compressors[segment.compression].decompress(
                            os.path.join(source_dir, segment.name),
                            dst_file)
                    else:
                        shutil.copy2(os.path.join(source_dir, segment.name),
                                     dst_file)
                if remote_command:
                    try:
                        # Transfer the WAL files
                        rsync.from_file_list(
                            list(segment.name for segment in xlogs[prefix]),
                            wal_decompression_dest, wal_dest)
                    except CommandFailedException as e:
                        msg = ("data transfer failure while copying WAL files "
                               "to directory '%s'") % (wal_dest[1:],)
                        raise DataTransferFailure.from_rsync_error(e, msg)

                    # Cleanup files after the transfer
                    for segment in xlogs[prefix]:
                        file_name = os.path.join(wal_decompression_dest,
                                                 segment.name)
                        try:
                            os.unlink(file_name)
                        except OSError as e:
                            output.warning(
                                "Error removing temporary file '%s': %s",
                                file_name, e)
            else:
                try:
                    rsync.from_file_list(
                        list(segment.name for segment in xlogs[prefix]),
                        "%s/" % os.path.join(self.config.wals_directory,
                                             prefix),
                        wal_dest)
                except CommandFailedException as e:
                    msg = "data transfer failure while copying WAL files " \
                          "to directory '%s'" % (wal_dest[1:],)
                    raise DataTransferFailure.from_rsync_error(e, msg)

        _logger.info("Finished copying %s WAL files.", total_wals)

        # Remove local decompression target directory if different from the
        # destination directory (it happens when compression is in use during a
        # remote recovery
        if wal_decompression_dest and wal_decompression_dest != wal_dest:
            shutil.rmtree(wal_decompression_dest)
    def basebackup_copy(self, backup_info, dest, tablespaces=None,
                        remote_command=None, safe_horizon=None):
        """
        Perform the actual copy of the base backup for recovery purposes

        :param barman.infofile.BackupInfo backup_info: the backup to recover
        :param str dest: the destination directory
        :param dict[str,str]|None tablespaces: a tablespace
            name -> location map (for relocation)
        :param str|None remote_command: default None. The remote command to
            recover the base backup, in case of remote backup.
        :param datetime.datetime|None safe_horizon: anything after this time
            has to be checked with checksum
        """

        # Dictionary for paths to be excluded from rsync
        exclude_and_protect = []

        # Set a ':' prefix to remote destinations
        dest_prefix = ''
        if remote_command:
            dest_prefix = ':'

        # Copy tablespaces applying bwlimit when necessary
        if backup_info.tablespaces:
            tablespaces_bw_limit = self.config.tablespace_bandwidth_limit
            # Copy a tablespace at a time
            for tablespace in backup_info.tablespaces:
                # Apply bandwidth limit if requested
                bwlimit = self.config.bandwidth_limit
                if tablespaces_bw_limit and \
                        tablespace.name in tablespaces_bw_limit:
                    bwlimit = tablespaces_bw_limit[tablespace.name]
                # By default a tablespace goes in the same location where
                # it was on the source server when the backup was taken
                location = tablespace.location
                # If a relocation has been requested for this tablespace
                # use the user provided target directory
                if tablespaces and tablespace.name in tablespaces:
                    location = tablespaces[tablespace.name]
                # If the tablespace location is inside the data directory,
                # exclude and protect it from being deleted during
                # the data directory copy
                if location.startswith(dest):
                    exclude_and_protect.append(location[len(dest):])
                # Exclude and protect the tablespace from being deleted during
                # the data directory copy
                exclude_and_protect.append("/pg_tblspc/%s" % tablespace.oid)
                # Copy the tablespace using smart copy
                tb_rsync = RsyncPgData(
                    path=self.server.path,
                    ssh=remote_command,
                    bwlimit=bwlimit,
                    network_compression=self.config.network_compression,
                    check=True)
                try:
                    tb_rsync.smart_copy(
                        '%s/' % backup_info.get_data_directory(tablespace.oid),
                        dest_prefix + location,
                        safe_horizon)
                except CommandFailedException as e:
                    msg = "data transfer failure on directory '%s'" % location
                    raise DataTransferFailure.from_rsync_error(e, msg)

        # Copy the pgdata directory
        rsync = RsyncPgData(
            path=self.server.path,
            ssh=remote_command,
            bwlimit=self.config.bandwidth_limit,
            exclude_and_protect=exclude_and_protect,
            network_compression=self.config.network_compression)
        try:
            rsync.smart_copy(
                '%s/' % backup_info.get_data_directory(),
                dest_prefix + dest,
                safe_horizon)
        except CommandFailedException as e:
            msg = "data transfer failure on directory '%s'" % dest
            raise DataTransferFailure.from_rsync_error(e, msg)
    def backup_copy(self, backup_info):
        """
        Perform the actual copy of the backup using Rsync.
        First, it copies one tablespace at a time, then the PGDATA directory,
        and finally configuration files (if outside PGDATA).
        Bandwidth limitation, according to configuration, is applied in
        the process.
        This method is the core of base backup copy using Rsync+Ssh.

        :param barman.infofile.BackupInfo backup_info: backup information
        """

        # List of paths to be ignored by Rsync
        exclude_and_protect = []

        # Retrieve the previous backup metadata, then set safe_horizon
        previous_backup = self.backup_manager.get_previous_backup(
            backup_info.backup_id)
        if previous_backup:
            # safe_horizon is a tz-aware timestamp because BackupInfo class
            # ensures it
            safe_horizon = previous_backup.begin_time
        else:
            # If no previous backup is present, safe_horizon is set to None
            safe_horizon = None

        # Copy tablespaces applying bwlimit when necessary
        if backup_info.tablespaces:
            tablespaces_bw_limit = self.config.tablespace_bandwidth_limit
            # Copy a tablespace at a time
            for tablespace in backup_info.tablespaces:
                self.current_action = "copying tablespace '%s'" % \
                                      tablespace.name
                # Apply bandwidth limit if requested
                bwlimit = self.config.bandwidth_limit
                if tablespaces_bw_limit and \
                        tablespace.name in tablespaces_bw_limit:
                    bwlimit = tablespaces_bw_limit[tablespace.name]
                if bwlimit:
                    self.current_action += (" with bwlimit '%d'" % bwlimit)
                _logger.debug(self.current_action)
                # If the tablespace location is inside the data directory,
                # exclude and protect it from being copied twice during
                # the data directory copy
                if tablespace.location.startswith(backup_info.pgdata):
                    exclude_and_protect.append(
                        tablespace.location[len(backup_info.pgdata):])
                # Make sure the destination directory exists in order for
                # smart copy to detect that no file is present there
                tablespace_dest = backup_info.get_data_directory(
                    tablespace.oid)
                mkpath(tablespace_dest)
                # Exclude and protect the tablespace from being copied again
                # during the data directory copy
                exclude_and_protect.append("/pg_tblspc/%s" % tablespace.oid)
                # Copy the backup using smart_copy trying to reuse the
                # tablespace of the previous backup if incremental is active
                ref_dir = self._reuse_dir(previous_backup, tablespace.oid)
                tb_rsync = RsyncPgData(
                    path=self.server.path,
                    ssh=self.ssh_command,
                    ssh_options=self.ssh_options,
                    args=self._reuse_args(ref_dir),
                    bwlimit=bwlimit,
                    network_compression=self.config.network_compression,
                    check=True)
                try:
                    tb_rsync.smart_copy(
                        ':%s/' % tablespace.location,
                        tablespace_dest,
                        safe_horizon,
                        ref_dir)
                except CommandFailedException as e:
                    msg = "data transfer failure on directory '%s'" % \
                          backup_info.get_data_directory(tablespace.oid)
                    raise DataTransferFailure.from_rsync_error(e, msg)

        # Make sure the destination directory exists in order for smart copy
        # to detect that no file is present there
        backup_dest = backup_info.get_data_directory()
        mkpath(backup_dest)

        # Copy the PGDATA, trying to reuse the data dir
        # of the previous backup if incremental is active
        ref_dir = self._reuse_dir(previous_backup)
        rsync = RsyncPgData(
            path=self.server.path,
            ssh=self.ssh_command,
            ssh_options=self.ssh_options,
            args=self._reuse_args(ref_dir),
            bwlimit=self.config.bandwidth_limit,
            exclude_and_protect=exclude_and_protect,
            network_compression=self.config.network_compression)
        try:
            rsync.smart_copy(':%s/' % backup_info.pgdata, backup_dest,
                             safe_horizon,
                             ref_dir)
        except CommandFailedException as e:
            msg = "data transfer failure on directory '%s'" % \
                  backup_info.pgdata
            raise DataTransferFailure.from_rsync_error(e, msg)

        # At last copy pg_control
        try:
            rsync(':%s/global/pg_control' % (backup_info.pgdata,),
                  '%s/global/pg_control' % (backup_dest,))
        except CommandFailedException as e:
            msg = "data transfer failure on file '%s/global/pg_control'" % \
                  backup_info.pgdata
            raise DataTransferFailure.from_rsync_error(e, msg)

        # Copy configuration files (if not inside PGDATA)
        self.current_action = "copying configuration files"
        _logger.debug(self.current_action)
        for key in ('config_file', 'hba_file', 'ident_file'):
            cf = getattr(backup_info, key, None)
            if cf:
                assert isinstance(cf, str)
                # Consider only those that reside outside of the original
                # PGDATA directory
                if cf.startswith(backup_info.pgdata):
                    self.current_action = \
                        "skipping %s as contained in %s directory" % (
                            key, backup_info.pgdata)
                    _logger.debug(self.current_action)
                    continue
                self.current_action = "copying %s as outside %s directory" % (
                    key, backup_info.pgdata)
                _logger.info(self.current_action)
                try:
                    rsync(':%s' % cf, backup_dest)
                except CommandFailedException as e:
                    ret_code = e.args[0]['ret']
                    msg = "data transfer failure on file '%s'" % cf
                    if 'ident_file' == key and ret_code == 23:
                        # If the ident file is missing,
                        # it isn't an error condition for PostgreSQL.
                        # Barman is consistent with this behavior.
                        output.warning(msg, log=True)
                        continue
                    else:
                        raise DataTransferFailure.from_rsync_error(e, msg)
        # Check for any include directives in PostgreSQL configuration
        # Currently, include directives are not supported for files that
        # reside outside PGDATA. These files must be manually backed up.
        # Barman will emit a warning and list those files
        if backup_info.included_files:
            filtered_files = [
                included_file
                for included_file in backup_info.included_files
                if not included_file.startswith(backup_info.pgdata)
            ]
            if len(filtered_files) > 0:
                output.warning(
                    "The usage of include directives is not supported "
                    "for files that reside outside PGDATA.\n"
                    "Please manually backup the following files:\n"
                    "\t%s\n",
                    "\n\t".join(filtered_files)
                )
    def backup_copy(self, backup_info):
        """
        Perform the actual copy of the backup using Rsync.
        First, it copies one tablespace at a time, then the PGDATA directory,
        and finally configuration files (if outside PGDATA).
        Bandwidth limitation, according to configuration, is applied in
        the process.
        This method is the core of base backup copy using Rsync+Ssh.

        :param barman.infofile.BackupInfo backup_info: backup information
        """

        # List of paths to be ignored by Rsync
        exclude_and_protect = []

        # Retrieve the previous backup metadata, then set safe_horizon
        previous_backup = self.backup_manager.get_previous_backup(
            backup_info.backup_id)
        if previous_backup:
            # safe_horizon is a tz-aware timestamp because BackupInfo class
            # ensures it
            safe_horizon = previous_backup.begin_time
        else:
            # If no previous backup is present, safe_horizon is set to None
            safe_horizon = None

        # Copy tablespaces applying bwlimit when necessary
        if backup_info.tablespaces:
            tablespaces_bw_limit = self.config.tablespace_bandwidth_limit
            # Copy a tablespace at a time
            for tablespace in backup_info.tablespaces:
                self.current_action = "copying tablespace '%s'" % \
                                      tablespace.name
                # Apply bandwidth limit if requested
                bwlimit = self.config.bandwidth_limit
                if tablespaces_bw_limit and \
                        tablespace.name in tablespaces_bw_limit:
                    bwlimit = tablespaces_bw_limit[tablespace.name]
                if bwlimit:
                    self.current_action += (" with bwlimit '%d'" % bwlimit)
                _logger.debug(self.current_action)
                # If the tablespace location is inside the data directory,
                # exclude and protect it from being copied twice during
                # the data directory copy
                if tablespace.location.startswith(backup_info.pgdata):
                    exclude_and_protect.append(
                        tablespace.location[len(backup_info.pgdata):])
                # Make sure the destination directory exists in order for
                # smart copy to detect that no file is present there
                tablespace_dest = backup_info.get_data_directory(tablespace.oid)
                mkpath(tablespace_dest)
                # Exclude and protect the tablespace from being copied again
                # during the data directory copy
                exclude_and_protect.append("/pg_tblspc/%s" % tablespace.oid)
                # Copy the backup using smart_copy trying to reuse the
                # tablespace of the previous backup if incremental is active
                ref_dir = self._reuse_dir(previous_backup, tablespace.oid)
                tb_rsync = RsyncPgData(
                    path=self.server.path,
                    ssh=self.ssh_command,
                    ssh_options=self.ssh_options,
                    args=self._reuse_args(ref_dir),
                    bwlimit=bwlimit,
                    network_compression=self.config.network_compression,
                    check=True)
                try:
                    tb_rsync.smart_copy(
                        ':%s/' % tablespace.location,
                        tablespace_dest,
                        safe_horizon,
                        ref_dir)
                except CommandFailedException, e:
                    msg = "data transfer failure on directory '%s'" % \
                          backup_info.get_data_directory(tablespace.oid)
                    raise DataTransferFailure.from_rsync_error(e, msg)
            rsync.smart_copy(':%s/' % backup_info.pgdata, backup_dest,
                             safe_horizon,
                             ref_dir)
        except CommandFailedException, e:
            msg = "data transfer failure on directory '%s'" % \
                  backup_info.pgdata
            raise DataTransferFailure.from_rsync_error(e, msg)

        # At last copy pg_control
        try:
            rsync(':%s/global/pg_control' % (backup_info.pgdata,),
                  '%s/global/pg_control' % (backup_dest,))
        except CommandFailedException, e:
            msg = "data transfer failure on file '%s/global/pg_control'" % \
                  backup_info.pgdata
            raise DataTransferFailure.from_rsync_error(e, msg)

        # Copy configuration files (if not inside PGDATA)
        self.current_action = "copying configuration files"
        _logger.debug(self.current_action)
        for key in ('config_file', 'hba_file', 'ident_file'):
            cf = getattr(backup_info, key, None)
            if cf:
                assert isinstance(cf, str)
                # Consider only those that reside outside of the original PGDATA
                if cf.startswith(backup_info.pgdata):
                    self.current_action = \
                        "skipping %s as contained in %s directory" % (
                            key, backup_info.pgdata)
                    _logger.debug(self.current_action)
                    continue
Exemple #6
0
    def xlog_copy(self, required_xlog_files, wal_dest, remote_command):
        """
        Restore WAL segments

        :param required_xlog_files: list of all required WAL files
        :param wal_dest: the destination directory for xlog recover
        :param remote_command: default None. The remote command to recover
               the xlog, in case of remote backup.
        """
        # Retrieve the list of required WAL segments
        # according to recovery options
        xlogs = {}
        for wal_info in required_xlog_files:
            hashdir = xlog.hash_dir(wal_info.name)
            if hashdir not in xlogs:
                xlogs[hashdir] = []
            xlogs[hashdir].append(wal_info.name)
        # Check decompression options
        compressor = self.backup_manager.compression_manager.get_compressor()

        rsync = RsyncPgData(
            ssh=remote_command,
            bwlimit=self.config.bandwidth_limit,
            network_compression=self.config.network_compression)
        if remote_command:
            # If remote recovery tell rsync to copy them remotely
            # add ':' prefix to mark it as remote
            # add '/' suffix to ensure it is a directory
            wal_dest = ':%s/' % wal_dest
        else:
            # we will not use rsync: destdir must exists
            mkpath(wal_dest)
        if compressor and remote_command:
            xlog_spool = tempfile.mkdtemp(prefix='barman_xlog-')
        total_wals = sum(map(len, xlogs.values()))
        partial_count = 0
        for prefix in sorted(xlogs):
            batch_len = len(xlogs[prefix])
            partial_count += batch_len
            source_dir = os.path.join(self.config.wals_directory, prefix)
            _logger.info(
                "Starting copy of %s WAL files %s/%s from %s to %s",
                batch_len,
                partial_count,
                total_wals,
                xlogs[prefix][0],
                xlogs[prefix][-1])
            if compressor:
                if remote_command:
                    for segment in xlogs[prefix]:
                        compressor.decompress(os.path.join(source_dir, segment),
                                              os.path.join(xlog_spool, segment))
                    try:
                        rsync.from_file_list(xlogs[prefix],
                                             xlog_spool, wal_dest)
                    except CommandFailedException, e:
                        msg = "data transfer failure while copying WAL files " \
                              "to directory '%s'" % (wal_dest[1:],)
                        raise DataTransferFailure.from_rsync_error(e, msg)

                    # Cleanup files after the transfer
                    for segment in xlogs[prefix]:
                        file_name = os.path.join(xlog_spool, segment)
                        try:
                            os.unlink(file_name)
                        except OSError as e:
                            output.warning(
                                "Error removing temporary file '%s': %s",
                                file_name, e)
                else:
                    # decompress directly to the right place
                    for segment in xlogs[prefix]:
                        compressor.decompress(os.path.join(source_dir, segment),
                                              os.path.join(wal_dest, segment))
            else:
                try:
                    rsync.from_file_list(
                        xlogs[prefix],
                        "%s/" % os.path.join(
                            self.config.wals_directory, prefix),
                        wal_dest)
                except CommandFailedException, e:
                    msg = "data transfer failure while copying WAL files " \
                          "to directory '%s'" % (wal_dest[1:],)
                    raise DataTransferFailure.from_rsync_error(e, msg)