Exemple #1
0
    def _xlog_copy(self, required_xlog_files, wal_dest, remote_command):
        """
        Restore WAL segments

        :param required_xlog_files: list of all required WAL files
        :param wal_dest: the destination directory for xlog recover
        :param remote_command: default None. The remote command to recover
               the xlog, in case of remote backup.
        """
        # List of required WAL files partitioned by containing directory
        xlogs = collections.defaultdict(list)
        # add '/' suffix to ensure it is a directory
        wal_dest = '%s/' % wal_dest
        # Map of every compressor used with any WAL file in the archive,
        # to be used during this recovery
        compressors = {}
        compression_manager = self.backup_manager.compression_manager
        # Fill xlogs and compressors maps from required_xlog_files
        for wal_info in required_xlog_files:
            hashdir = xlog.hash_dir(wal_info.name)
            xlogs[hashdir].append(wal_info)
            # If a compressor is required, make sure it exists in the cache
            if wal_info.compression is not None and \
                    wal_info.compression not in compressors:
                compressors[wal_info.compression] = \
                    compression_manager.get_compressor(
                        compression=wal_info.compression)

        rsync = RsyncPgData(
            path=self.server.path,
            ssh=remote_command,
            bwlimit=self.config.bandwidth_limit,
            network_compression=self.config.network_compression)
        # If compression is used and this is a remote recovery, we need a
        # temporary directory where to spool uncompressed files,
        # otherwise we either decompress every WAL file in the local
        # destination, or we ship the uncompressed file remotely
        if compressors:
            if remote_command:
                # Decompress to a temporary spool directory
                wal_decompression_dest = tempfile.mkdtemp(
                    prefix='barman_xlog-')
            else:
                # Decompress directly to the destination directory
                wal_decompression_dest = wal_dest
            # Make sure wal_decompression_dest exists
            mkpath(wal_decompression_dest)
        else:
            # If no compression
            wal_decompression_dest = None
        if remote_command:
            # If remote recovery tell rsync to copy them remotely
            # add ':' prefix to mark it as remote
            wal_dest = ':%s' % wal_dest
        total_wals = sum(map(len, xlogs.values()))
        partial_count = 0
        for prefix in sorted(xlogs):
            batch_len = len(xlogs[prefix])
            partial_count += batch_len
            source_dir = os.path.join(self.config.wals_directory, prefix)
            _logger.info("Starting copy of %s WAL files %s/%s from %s to %s",
                         batch_len, partial_count, total_wals,
                         xlogs[prefix][0], xlogs[prefix][-1])
            # If at least one compressed file has been found, activate
            # compression check and decompression for each WAL files
            if compressors:
                for segment in xlogs[prefix]:
                    dst_file = os.path.join(wal_decompression_dest,
                                            segment.name)
                    if segment.compression is not None:
                        compressors[segment.compression].decompress(
                            os.path.join(source_dir, segment.name), dst_file)
                    else:
                        shutil.copy2(os.path.join(source_dir, segment.name),
                                     dst_file)
                if remote_command:
                    try:
                        # Transfer the WAL files
                        rsync.from_file_list(
                            list(segment.name for segment in xlogs[prefix]),
                            wal_decompression_dest, wal_dest)
                    except CommandFailedException as e:
                        msg = ("data transfer failure while copying WAL files "
                               "to directory '%s'") % (wal_dest[1:], )
                        raise DataTransferFailure.from_command_error(
                            'rsync', e, msg)

                    # Cleanup files after the transfer
                    for segment in xlogs[prefix]:
                        file_name = os.path.join(wal_decompression_dest,
                                                 segment.name)
                        try:
                            os.unlink(file_name)
                        except OSError as e:
                            output.warning(
                                "Error removing temporary file '%s': %s",
                                file_name, e)
            else:
                try:
                    rsync.from_file_list(
                        list(segment.name
                             for segment in xlogs[prefix]), "%s/" %
                        os.path.join(self.config.wals_directory, prefix),
                        wal_dest)
                except CommandFailedException as e:
                    msg = "data transfer failure while copying WAL files " \
                          "to directory '%s'" % (wal_dest[1:],)
                    raise DataTransferFailure.from_command_error(
                        'rsync', e, msg)

        _logger.info("Finished copying %s WAL files.", total_wals)

        # Remove local decompression target directory if different from the
        # destination directory (it happens when compression is in use during a
        # remote recovery
        if wal_decompression_dest and wal_decompression_dest != wal_dest:
            shutil.rmtree(wal_decompression_dest)
Exemple #2
0
    def _backup_copy(self,
                     backup_info,
                     dest,
                     tablespaces=None,
                     remote_command=None,
                     safe_horizon=None):
        """
        Perform the actual copy of the base backup for recovery purposes

        First, it copies one tablespace at a time, then the PGDATA directory.

        Bandwidth limitation, according to configuration, is applied in
        the process.

        TODO: manage configuration files if outside PGDATA.

        :param barman.infofile.BackupInfo backup_info: the backup to recover
        :param str dest: the destination directory
        :param dict[str,str]|None tablespaces: a tablespace
            name -> location map (for relocation)
        :param str|None remote_command: default None. The remote command to
            recover the base backup, in case of remote backup.
        :param datetime.datetime|None safe_horizon: anything after this time
            has to be checked with checksum
        """

        # Set a ':' prefix to remote destinations
        dest_prefix = ''
        if remote_command:
            dest_prefix = ':'

        # Create the copy controller object, specific for rsync,
        # which will drive all the copy operations. Items to be
        # copied are added before executing the copy() method
        controller = RsyncCopyController(
            path=self.server.path,
            ssh_command=remote_command,
            network_compression=self.config.network_compression,
            safe_horizon=safe_horizon,
            retry_times=self.config.basebackup_retry_times,
            retry_sleep=self.config.basebackup_retry_sleep,
            workers=self.config.parallel_jobs,
        )

        # Dictionary for paths to be excluded from rsync
        exclude_and_protect = []

        # Process every tablespace
        if backup_info.tablespaces:
            for tablespace in backup_info.tablespaces:
                # By default a tablespace goes in the same location where
                # it was on the source server when the backup was taken
                location = tablespace.location
                # If a relocation has been requested for this tablespace
                # use the user provided target directory
                if tablespaces and tablespace.name in tablespaces:
                    location = tablespaces[tablespace.name]

                # If the tablespace location is inside the data directory,
                # exclude and protect it from being deleted during
                # the data directory copy
                if location.startswith(dest):
                    exclude_and_protect += [location[len(dest):]]

                # Exclude and protect the tablespace from being deleted during
                # the data directory copy
                exclude_and_protect.append("/pg_tblspc/%s" % tablespace.oid)

                # Add the tablespace directory to the list of objects
                # to be copied by the controller
                controller.add_directory(
                    label=tablespace.name,
                    src='%s/' % backup_info.get_data_directory(tablespace.oid),
                    dst=dest_prefix + location,
                    bwlimit=self.config.get_bwlimit(tablespace),
                    item_class=controller.TABLESPACE_CLASS)

        # Add the PGDATA directory to the list of objects to be copied
        # by the controller
        controller.add_directory(label='pgdata',
                                 src='%s/' % backup_info.get_data_directory(),
                                 dst=dest_prefix + dest,
                                 bwlimit=self.config.get_bwlimit(),
                                 exclude=[
                                     '/pg_log/*',
                                     '/pg_xlog/*',
                                     '/pg_wal/*',
                                     '/postmaster.pid',
                                     '/recovery.conf',
                                     '/tablespace_map',
                                 ],
                                 exclude_and_protect=exclude_and_protect,
                                 item_class=controller.PGDATA_CLASS)

        # TODO: Manage different location for configuration files
        # TODO: that were not within the data directory

        # Execute the copy
        try:
            controller.copy()
        # TODO: Improve the exception output
        except CommandFailedException as e:
            msg = "data transfer failure"
            raise DataTransferFailure.from_command_error('rsync', e, msg)
    def _backup_copy(self, backup_info, dest, tablespaces=None,
                     remote_command=None, safe_horizon=None):
        """
        Perform the actual copy of the base backup for recovery purposes

        First, it copies one tablespace at a time, then the PGDATA directory.

        Bandwidth limitation, according to configuration, is applied in
        the process.

        TODO: manage configuration files if outside PGDATA.

        :param barman.infofile.BackupInfo backup_info: the backup to recover
        :param str dest: the destination directory
        :param dict[str,str]|None tablespaces: a tablespace
            name -> location map (for relocation)
        :param str|None remote_command: default None. The remote command to
            recover the base backup, in case of remote backup.
        :param datetime.datetime|None safe_horizon: anything after this time
            has to be checked with checksum
        """

        # Set a ':' prefix to remote destinations
        dest_prefix = ''
        if remote_command:
            dest_prefix = ':'

        # Create the copy controller object, specific for rsync,
        # which will drive all the copy operations. Items to be
        # copied are added before executing the copy() method
        controller = RsyncCopyController(
            path=self.server.path,
            ssh_command=remote_command,
            network_compression=self.config.network_compression,
            safe_horizon=safe_horizon,
            retry_times=self.config.basebackup_retry_times,
            retry_sleep=self.config.basebackup_retry_sleep,
        )

        # Dictionary for paths to be excluded from rsync
        exclude_and_protect = []

        # Process every tablespace
        if backup_info.tablespaces:
            for tablespace in backup_info.tablespaces:
                # By default a tablespace goes in the same location where
                # it was on the source server when the backup was taken
                location = tablespace.location
                # If a relocation has been requested for this tablespace
                # use the user provided target directory
                if tablespaces and tablespace.name in tablespaces:
                    location = tablespaces[tablespace.name]

                # If the tablespace location is inside the data directory,
                # exclude and protect it from being deleted during
                # the data directory copy
                if location.startswith(dest):
                    exclude_and_protect += [location[len(dest):]]

                # Exclude and protect the tablespace from being deleted during
                # the data directory copy
                exclude_and_protect.append("/pg_tblspc/%s" % tablespace.oid)

                # Add the tablespace directory to the list of objects
                # to be copied by the controller
                controller.add_directory(
                    label=tablespace.name,
                    src='%s/' % backup_info.get_data_directory(tablespace.oid),
                    dst=dest_prefix + location,
                    bwlimit=self.config.get_bwlimit(tablespace),
                    item_class=controller.TABLESPACE_CLASS
                    )

        # Add the PGDATA directory to the list of objects to be copied
        # by the controller
        controller.add_directory(
            label='pgdata',
            src='%s/' % backup_info.get_data_directory(),
            dst=dest_prefix + dest,
            bwlimit=self.config.get_bwlimit(),
            exclude=[
                '/pg_xlog/*',
                '/pg_log/*',
                '/recovery.conf',
                '/postmaster.pid'],
            exclude_and_protect=exclude_and_protect,
            item_class=controller.PGDATA_CLASS
        )

        # TODO: Manage different location for configuration files
        # TODO: that were not within the data directory

        # Execute the copy
        try:
            controller.copy()
        # TODO: Improve the exception output
        except CommandFailedException as e:
            msg = "data transfer failure"
            raise DataTransferFailure.from_command_error(
                'rsync', e, msg)
    def _xlog_copy(self, required_xlog_files, wal_dest, remote_command):
        """
        Restore WAL segments

        :param required_xlog_files: list of all required WAL files
        :param wal_dest: the destination directory for xlog recover
        :param remote_command: default None. The remote command to recover
               the xlog, in case of remote backup.
        """
        # List of required WAL files partitioned by containing directory
        xlogs = collections.defaultdict(list)
        # add '/' suffix to ensure it is a directory
        wal_dest = '%s/' % wal_dest
        # Map of every compressor used with any WAL file in the archive,
        # to be used during this recovery
        compressors = {}
        compression_manager = self.backup_manager.compression_manager
        # Fill xlogs and compressors maps from required_xlog_files
        for wal_info in required_xlog_files:
            hashdir = xlog.hash_dir(wal_info.name)
            xlogs[hashdir].append(wal_info)
            # If a compressor is required, make sure it exists in the cache
            if wal_info.compression is not None and \
                    wal_info.compression not in compressors:
                compressors[wal_info.compression] = \
                    compression_manager.get_compressor(
                        compression=wal_info.compression)

        rsync = RsyncPgData(
            path=self.server.path,
            ssh=remote_command,
            bwlimit=self.config.bandwidth_limit,
            network_compression=self.config.network_compression)
        # If compression is used and this is a remote recovery, we need a
        # temporary directory where to spool uncompressed files,
        # otherwise we either decompress every WAL file in the local
        # destination, or we ship the uncompressed file remotely
        if compressors:
            if remote_command:
                # Decompress to a temporary spool directory
                wal_decompression_dest = tempfile.mkdtemp(
                    prefix='barman_xlog-')
            else:
                # Decompress directly to the destination directory
                wal_decompression_dest = wal_dest
            # Make sure wal_decompression_dest exists
            mkpath(wal_decompression_dest)
        else:
            # If no compression
            wal_decompression_dest = None
        if remote_command:
            # If remote recovery tell rsync to copy them remotely
            # add ':' prefix to mark it as remote
            wal_dest = ':%s' % wal_dest
        total_wals = sum(map(len, xlogs.values()))
        partial_count = 0
        for prefix in sorted(xlogs):
            batch_len = len(xlogs[prefix])
            partial_count += batch_len
            source_dir = os.path.join(self.config.wals_directory, prefix)
            _logger.info(
                "Starting copy of %s WAL files %s/%s from %s to %s",
                batch_len,
                partial_count,
                total_wals,
                xlogs[prefix][0],
                xlogs[prefix][-1])
            # If at least one compressed file has been found, activate
            # compression check and decompression for each WAL files
            if compressors:
                for segment in xlogs[prefix]:
                    dst_file = os.path.join(wal_decompression_dest,
                                            segment.name)
                    if segment.compression is not None:
                        compressors[segment.compression].decompress(
                            os.path.join(source_dir, segment.name),
                            dst_file)
                    else:
                        shutil.copy2(os.path.join(source_dir, segment.name),
                                     dst_file)
                if remote_command:
                    try:
                        # Transfer the WAL files
                        rsync.from_file_list(
                            list(segment.name for segment in xlogs[prefix]),
                            wal_decompression_dest, wal_dest)
                    except CommandFailedException as e:
                        msg = ("data transfer failure while copying WAL files "
                               "to directory '%s'") % (wal_dest[1:],)
                        raise DataTransferFailure.from_command_error(
                            'rsync', e, msg)

                    # Cleanup files after the transfer
                    for segment in xlogs[prefix]:
                        file_name = os.path.join(wal_decompression_dest,
                                                 segment.name)
                        try:
                            os.unlink(file_name)
                        except OSError as e:
                            output.warning(
                                "Error removing temporary file '%s': %s",
                                file_name, e)
            else:
                try:
                    rsync.from_file_list(
                        list(segment.name for segment in xlogs[prefix]),
                        "%s/" % os.path.join(self.config.wals_directory,
                                             prefix),
                        wal_dest)
                except CommandFailedException as e:
                    msg = "data transfer failure while copying WAL files " \
                          "to directory '%s'" % (wal_dest[1:],)
                    raise DataTransferFailure.from_command_error(
                        'rsync', e, msg)

        _logger.info("Finished copying %s WAL files.", total_wals)

        # Remove local decompression target directory if different from the
        # destination directory (it happens when compression is in use during a
        # remote recovery
        if wal_decompression_dest and wal_decompression_dest != wal_dest:
            shutil.rmtree(wal_decompression_dest)
    def basebackup_copy(self,
                        backup_info,
                        dest,
                        tablespaces=None,
                        remote_command=None,
                        safe_horizon=None):
        """
        Perform the actual copy of the base backup for recovery purposes

        :param barman.infofile.BackupInfo backup_info: the backup to recover
        :param str dest: the destination directory
        :param dict[str,str]|None tablespaces: a tablespace
            name -> location map (for relocation)
        :param str|None remote_command: default None. The remote command to
            recover the base backup, in case of remote backup.
        :param datetime.datetime|None safe_horizon: anything after this time
            has to be checked with checksum
        """

        # Dictionary for paths to be excluded from rsync
        exclude_and_protect = []

        # Set a ':' prefix to remote destinations
        dest_prefix = ''
        if remote_command:
            dest_prefix = ':'

        # Copy tablespaces applying bwlimit when necessary
        if backup_info.tablespaces:
            tablespaces_bw_limit = self.config.tablespace_bandwidth_limit
            # Copy a tablespace at a time
            for tablespace in backup_info.tablespaces:
                # Apply bandwidth limit if requested
                bwlimit = self.config.bandwidth_limit
                if tablespaces_bw_limit and \
                        tablespace.name in tablespaces_bw_limit:
                    bwlimit = tablespaces_bw_limit[tablespace.name]
                # By default a tablespace goes in the same location where
                # it was on the source server when the backup was taken
                location = tablespace.location
                # If a relocation has been requested for this tablespace
                # use the user provided target directory
                if tablespaces and tablespace.name in tablespaces:
                    location = tablespaces[tablespace.name]
                # If the tablespace location is inside the data directory,
                # exclude and protect it from being deleted during
                # the data directory copy
                if location.startswith(dest):
                    exclude_and_protect.append(location[len(dest):])
                # Exclude and protect the tablespace from being deleted during
                # the data directory copy
                exclude_and_protect.append("/pg_tblspc/%s" % tablespace.oid)
                # Copy the tablespace using smart copy
                tb_rsync = RsyncPgData(
                    path=self.server.path,
                    ssh=remote_command,
                    bwlimit=bwlimit,
                    network_compression=self.config.network_compression,
                    check=True)
                try:
                    tb_rsync.smart_copy(
                        '%s/' % backup_info.get_data_directory(tablespace.oid),
                        dest_prefix + location, safe_horizon)
                except CommandFailedException as e:
                    msg = "data transfer failure on directory '%s'" % location
                    raise DataTransferFailure.from_command_error(
                        'rsync', e, msg)

        # Copy the pgdata directory
        rsync = RsyncPgData(
            path=self.server.path,
            ssh=remote_command,
            bwlimit=self.config.bandwidth_limit,
            exclude_and_protect=exclude_and_protect,
            network_compression=self.config.network_compression)
        try:
            rsync.smart_copy('%s/' % backup_info.get_data_directory(),
                             dest_prefix + dest, safe_horizon)
        except CommandFailedException as e:
            msg = "data transfer failure on directory '%s'" % dest
            raise DataTransferFailure.from_command_error('rsync', e, msg)