def xlog_copy(self, required_xlog_files, wal_dest, remote_command): """ Restore WAL segments :param required_xlog_files: list of all required WAL files :param wal_dest: the destination directory for xlog recover :param remote_command: default None. The remote command to recover the xlog, in case of remote backup. """ # List of required WAL files partitioned by containing directory xlogs = collections.defaultdict(list) # add '/' suffix to ensure it is a directory wal_dest = '%s/' % wal_dest # Map of every compressor used with any WAL file in the archive, # to be used during this recovery compressors = {} compression_manager = self.backup_manager.compression_manager # Fill xlogs and compressors maps from required_xlog_files for wal_info in required_xlog_files: hashdir = xlog.hash_dir(wal_info.name) xlogs[hashdir].append(wal_info) # If a compressor is required, make sure it exists in the cache if wal_info.compression is not None and \ wal_info.compression not in compressors: compressors[wal_info.compression] = \ compression_manager.get_compressor( compression=wal_info.compression) rsync = RsyncPgData( path=self.server.path, ssh=remote_command, bwlimit=self.config.bandwidth_limit, network_compression=self.config.network_compression) # If compression is used and this is a remote recovery, we need a # temporary directory where to spool uncompressed files, # otherwise we either decompress every WAL file in the local # destination, or we ship the uncompressed file remotely if compressors: if remote_command: # Decompress to a temporary spool directory wal_decompression_dest = tempfile.mkdtemp( prefix='barman_xlog-') else: # Decompress directly to the destination directory wal_decompression_dest = wal_dest # Make sure wal_decompression_dest exists mkpath(wal_decompression_dest) else: # If no compression wal_decompression_dest = None if remote_command: # If remote recovery tell rsync to copy them remotely # add ':' prefix to mark it as remote wal_dest = ':%s' % wal_dest total_wals = sum(map(len, xlogs.values())) partial_count = 0 for prefix in sorted(xlogs): batch_len = len(xlogs[prefix]) partial_count += batch_len source_dir = os.path.join(self.config.wals_directory, prefix) _logger.info( "Starting copy of %s WAL files %s/%s from %s to %s", batch_len, partial_count, total_wals, xlogs[prefix][0], xlogs[prefix][-1]) # If at least one compressed file has been found, activate # compression check and decompression for each WAL files if compressors: for segment in xlogs[prefix]: dst_file = os.path.join(wal_decompression_dest, segment.name) if segment.compression is not None: compressors[segment.compression].decompress( os.path.join(source_dir, segment.name), dst_file) else: shutil.copy2(os.path.join(source_dir, segment.name), dst_file) if remote_command: try: # Transfer the WAL files rsync.from_file_list( list(segment.name for segment in xlogs[prefix]), wal_decompression_dest, wal_dest) except CommandFailedException as e: msg = ("data transfer failure while copying WAL files " "to directory '%s'") % (wal_dest[1:],) raise DataTransferFailure.from_rsync_error(e, msg) # Cleanup files after the transfer for segment in xlogs[prefix]: file_name = os.path.join(wal_decompression_dest, segment.name) try: os.unlink(file_name) except OSError as e: output.warning( "Error removing temporary file '%s': %s", file_name, e) else: try: rsync.from_file_list( list(segment.name for segment in xlogs[prefix]), "%s/" % os.path.join(self.config.wals_directory, prefix), wal_dest) except CommandFailedException as e: msg = "data transfer failure while copying WAL files " \ "to directory '%s'" % (wal_dest[1:],) raise DataTransferFailure.from_rsync_error(e, msg) _logger.info("Finished copying %s WAL files.", total_wals) # Remove local decompression target directory if different from the # destination directory (it happens when compression is in use during a # remote recovery if wal_decompression_dest and wal_decompression_dest != wal_dest: shutil.rmtree(wal_decompression_dest)
def basebackup_copy(self, backup_info, dest, tablespaces=None, remote_command=None, safe_horizon=None): """ Perform the actual copy of the base backup for recovery purposes :param barman.infofile.BackupInfo backup_info: the backup to recover :param str dest: the destination directory :param dict[str,str]|None tablespaces: a tablespace name -> location map (for relocation) :param str|None remote_command: default None. The remote command to recover the base backup, in case of remote backup. :param datetime.datetime|None safe_horizon: anything after this time has to be checked with checksum """ # Dictionary for paths to be excluded from rsync exclude_and_protect = [] # Set a ':' prefix to remote destinations dest_prefix = '' if remote_command: dest_prefix = ':' # Copy tablespaces applying bwlimit when necessary if backup_info.tablespaces: tablespaces_bw_limit = self.config.tablespace_bandwidth_limit # Copy a tablespace at a time for tablespace in backup_info.tablespaces: # Apply bandwidth limit if requested bwlimit = self.config.bandwidth_limit if tablespaces_bw_limit and \ tablespace.name in tablespaces_bw_limit: bwlimit = tablespaces_bw_limit[tablespace.name] # By default a tablespace goes in the same location where # it was on the source server when the backup was taken location = tablespace.location # If a relocation has been requested for this tablespace # use the user provided target directory if tablespaces and tablespace.name in tablespaces: location = tablespaces[tablespace.name] # If the tablespace location is inside the data directory, # exclude and protect it from being deleted during # the data directory copy if location.startswith(dest): exclude_and_protect.append(location[len(dest):]) # Exclude and protect the tablespace from being deleted during # the data directory copy exclude_and_protect.append("/pg_tblspc/%s" % tablespace.oid) # Copy the tablespace using smart copy tb_rsync = RsyncPgData( path=self.server.path, ssh=remote_command, bwlimit=bwlimit, network_compression=self.config.network_compression, check=True) try: tb_rsync.smart_copy( '%s/' % backup_info.get_data_directory(tablespace.oid), dest_prefix + location, safe_horizon) except CommandFailedException as e: msg = "data transfer failure on directory '%s'" % location raise DataTransferFailure.from_rsync_error(e, msg) # Copy the pgdata directory rsync = RsyncPgData( path=self.server.path, ssh=remote_command, bwlimit=self.config.bandwidth_limit, exclude_and_protect=exclude_and_protect, network_compression=self.config.network_compression) try: rsync.smart_copy( '%s/' % backup_info.get_data_directory(), dest_prefix + dest, safe_horizon) except CommandFailedException as e: msg = "data transfer failure on directory '%s'" % dest raise DataTransferFailure.from_rsync_error(e, msg)
def backup_copy(self, backup_info): """ Perform the actual copy of the backup using Rsync. First, it copies one tablespace at a time, then the PGDATA directory, and finally configuration files (if outside PGDATA). Bandwidth limitation, according to configuration, is applied in the process. This method is the core of base backup copy using Rsync+Ssh. :param barman.infofile.BackupInfo backup_info: backup information """ # List of paths to be ignored by Rsync exclude_and_protect = [] # Retrieve the previous backup metadata, then set safe_horizon previous_backup = self.backup_manager.get_previous_backup( backup_info.backup_id) if previous_backup: # safe_horizon is a tz-aware timestamp because BackupInfo class # ensures it safe_horizon = previous_backup.begin_time else: # If no previous backup is present, safe_horizon is set to None safe_horizon = None # Copy tablespaces applying bwlimit when necessary if backup_info.tablespaces: tablespaces_bw_limit = self.config.tablespace_bandwidth_limit # Copy a tablespace at a time for tablespace in backup_info.tablespaces: self.current_action = "copying tablespace '%s'" % \ tablespace.name # Apply bandwidth limit if requested bwlimit = self.config.bandwidth_limit if tablespaces_bw_limit and \ tablespace.name in tablespaces_bw_limit: bwlimit = tablespaces_bw_limit[tablespace.name] if bwlimit: self.current_action += (" with bwlimit '%d'" % bwlimit) _logger.debug(self.current_action) # If the tablespace location is inside the data directory, # exclude and protect it from being copied twice during # the data directory copy if tablespace.location.startswith(backup_info.pgdata): exclude_and_protect.append( tablespace.location[len(backup_info.pgdata):]) # Make sure the destination directory exists in order for # smart copy to detect that no file is present there tablespace_dest = backup_info.get_data_directory( tablespace.oid) mkpath(tablespace_dest) # Exclude and protect the tablespace from being copied again # during the data directory copy exclude_and_protect.append("/pg_tblspc/%s" % tablespace.oid) # Copy the backup using smart_copy trying to reuse the # tablespace of the previous backup if incremental is active ref_dir = self._reuse_dir(previous_backup, tablespace.oid) tb_rsync = RsyncPgData( path=self.server.path, ssh=self.ssh_command, ssh_options=self.ssh_options, args=self._reuse_args(ref_dir), bwlimit=bwlimit, network_compression=self.config.network_compression, check=True) try: tb_rsync.smart_copy( ':%s/' % tablespace.location, tablespace_dest, safe_horizon, ref_dir) except CommandFailedException as e: msg = "data transfer failure on directory '%s'" % \ backup_info.get_data_directory(tablespace.oid) raise DataTransferFailure.from_rsync_error(e, msg) # Make sure the destination directory exists in order for smart copy # to detect that no file is present there backup_dest = backup_info.get_data_directory() mkpath(backup_dest) # Copy the PGDATA, trying to reuse the data dir # of the previous backup if incremental is active ref_dir = self._reuse_dir(previous_backup) rsync = RsyncPgData( path=self.server.path, ssh=self.ssh_command, ssh_options=self.ssh_options, args=self._reuse_args(ref_dir), bwlimit=self.config.bandwidth_limit, exclude_and_protect=exclude_and_protect, network_compression=self.config.network_compression) try: rsync.smart_copy(':%s/' % backup_info.pgdata, backup_dest, safe_horizon, ref_dir) except CommandFailedException as e: msg = "data transfer failure on directory '%s'" % \ backup_info.pgdata raise DataTransferFailure.from_rsync_error(e, msg) # At last copy pg_control try: rsync(':%s/global/pg_control' % (backup_info.pgdata,), '%s/global/pg_control' % (backup_dest,)) except CommandFailedException as e: msg = "data transfer failure on file '%s/global/pg_control'" % \ backup_info.pgdata raise DataTransferFailure.from_rsync_error(e, msg) # Copy configuration files (if not inside PGDATA) self.current_action = "copying configuration files" _logger.debug(self.current_action) for key in ('config_file', 'hba_file', 'ident_file'): cf = getattr(backup_info, key, None) if cf: assert isinstance(cf, str) # Consider only those that reside outside of the original # PGDATA directory if cf.startswith(backup_info.pgdata): self.current_action = \ "skipping %s as contained in %s directory" % ( key, backup_info.pgdata) _logger.debug(self.current_action) continue self.current_action = "copying %s as outside %s directory" % ( key, backup_info.pgdata) _logger.info(self.current_action) try: rsync(':%s' % cf, backup_dest) except CommandFailedException as e: ret_code = e.args[0]['ret'] msg = "data transfer failure on file '%s'" % cf if 'ident_file' == key and ret_code == 23: # If the ident file is missing, # it isn't an error condition for PostgreSQL. # Barman is consistent with this behavior. output.warning(msg, log=True) continue else: raise DataTransferFailure.from_rsync_error(e, msg) # Check for any include directives in PostgreSQL configuration # Currently, include directives are not supported for files that # reside outside PGDATA. These files must be manually backed up. # Barman will emit a warning and list those files if backup_info.included_files: filtered_files = [ included_file for included_file in backup_info.included_files if not included_file.startswith(backup_info.pgdata) ] if len(filtered_files) > 0: output.warning( "The usage of include directives is not supported " "for files that reside outside PGDATA.\n" "Please manually backup the following files:\n" "\t%s\n", "\n\t".join(filtered_files) )
def backup_copy(self, backup_info): """ Perform the actual copy of the backup using Rsync. First, it copies one tablespace at a time, then the PGDATA directory, and finally configuration files (if outside PGDATA). Bandwidth limitation, according to configuration, is applied in the process. This method is the core of base backup copy using Rsync+Ssh. :param barman.infofile.BackupInfo backup_info: backup information """ # List of paths to be ignored by Rsync exclude_and_protect = [] # Retrieve the previous backup metadata, then set safe_horizon previous_backup = self.backup_manager.get_previous_backup( backup_info.backup_id) if previous_backup: # safe_horizon is a tz-aware timestamp because BackupInfo class # ensures it safe_horizon = previous_backup.begin_time else: # If no previous backup is present, safe_horizon is set to None safe_horizon = None # Copy tablespaces applying bwlimit when necessary if backup_info.tablespaces: tablespaces_bw_limit = self.config.tablespace_bandwidth_limit # Copy a tablespace at a time for tablespace in backup_info.tablespaces: self.current_action = "copying tablespace '%s'" % \ tablespace.name # Apply bandwidth limit if requested bwlimit = self.config.bandwidth_limit if tablespaces_bw_limit and \ tablespace.name in tablespaces_bw_limit: bwlimit = tablespaces_bw_limit[tablespace.name] if bwlimit: self.current_action += (" with bwlimit '%d'" % bwlimit) _logger.debug(self.current_action) # If the tablespace location is inside the data directory, # exclude and protect it from being copied twice during # the data directory copy if tablespace.location.startswith(backup_info.pgdata): exclude_and_protect.append( tablespace.location[len(backup_info.pgdata):]) # Make sure the destination directory exists in order for # smart copy to detect that no file is present there tablespace_dest = backup_info.get_data_directory(tablespace.oid) mkpath(tablespace_dest) # Exclude and protect the tablespace from being copied again # during the data directory copy exclude_and_protect.append("/pg_tblspc/%s" % tablespace.oid) # Copy the backup using smart_copy trying to reuse the # tablespace of the previous backup if incremental is active ref_dir = self._reuse_dir(previous_backup, tablespace.oid) tb_rsync = RsyncPgData( path=self.server.path, ssh=self.ssh_command, ssh_options=self.ssh_options, args=self._reuse_args(ref_dir), bwlimit=bwlimit, network_compression=self.config.network_compression, check=True) try: tb_rsync.smart_copy( ':%s/' % tablespace.location, tablespace_dest, safe_horizon, ref_dir) except CommandFailedException, e: msg = "data transfer failure on directory '%s'" % \ backup_info.get_data_directory(tablespace.oid) raise DataTransferFailure.from_rsync_error(e, msg)
rsync.smart_copy(':%s/' % backup_info.pgdata, backup_dest, safe_horizon, ref_dir) except CommandFailedException, e: msg = "data transfer failure on directory '%s'" % \ backup_info.pgdata raise DataTransferFailure.from_rsync_error(e, msg) # At last copy pg_control try: rsync(':%s/global/pg_control' % (backup_info.pgdata,), '%s/global/pg_control' % (backup_dest,)) except CommandFailedException, e: msg = "data transfer failure on file '%s/global/pg_control'" % \ backup_info.pgdata raise DataTransferFailure.from_rsync_error(e, msg) # Copy configuration files (if not inside PGDATA) self.current_action = "copying configuration files" _logger.debug(self.current_action) for key in ('config_file', 'hba_file', 'ident_file'): cf = getattr(backup_info, key, None) if cf: assert isinstance(cf, str) # Consider only those that reside outside of the original PGDATA if cf.startswith(backup_info.pgdata): self.current_action = \ "skipping %s as contained in %s directory" % ( key, backup_info.pgdata) _logger.debug(self.current_action) continue
def xlog_copy(self, required_xlog_files, wal_dest, remote_command): """ Restore WAL segments :param required_xlog_files: list of all required WAL files :param wal_dest: the destination directory for xlog recover :param remote_command: default None. The remote command to recover the xlog, in case of remote backup. """ # Retrieve the list of required WAL segments # according to recovery options xlogs = {} for wal_info in required_xlog_files: hashdir = xlog.hash_dir(wal_info.name) if hashdir not in xlogs: xlogs[hashdir] = [] xlogs[hashdir].append(wal_info.name) # Check decompression options compressor = self.backup_manager.compression_manager.get_compressor() rsync = RsyncPgData( ssh=remote_command, bwlimit=self.config.bandwidth_limit, network_compression=self.config.network_compression) if remote_command: # If remote recovery tell rsync to copy them remotely # add ':' prefix to mark it as remote # add '/' suffix to ensure it is a directory wal_dest = ':%s/' % wal_dest else: # we will not use rsync: destdir must exists mkpath(wal_dest) if compressor and remote_command: xlog_spool = tempfile.mkdtemp(prefix='barman_xlog-') total_wals = sum(map(len, xlogs.values())) partial_count = 0 for prefix in sorted(xlogs): batch_len = len(xlogs[prefix]) partial_count += batch_len source_dir = os.path.join(self.config.wals_directory, prefix) _logger.info( "Starting copy of %s WAL files %s/%s from %s to %s", batch_len, partial_count, total_wals, xlogs[prefix][0], xlogs[prefix][-1]) if compressor: if remote_command: for segment in xlogs[prefix]: compressor.decompress(os.path.join(source_dir, segment), os.path.join(xlog_spool, segment)) try: rsync.from_file_list(xlogs[prefix], xlog_spool, wal_dest) except CommandFailedException, e: msg = "data transfer failure while copying WAL files " \ "to directory '%s'" % (wal_dest[1:],) raise DataTransferFailure.from_rsync_error(e, msg) # Cleanup files after the transfer for segment in xlogs[prefix]: file_name = os.path.join(xlog_spool, segment) try: os.unlink(file_name) except OSError as e: output.warning( "Error removing temporary file '%s': %s", file_name, e) else: # decompress directly to the right place for segment in xlogs[prefix]: compressor.decompress(os.path.join(source_dir, segment), os.path.join(wal_dest, segment)) else: try: rsync.from_file_list( xlogs[prefix], "%s/" % os.path.join( self.config.wals_directory, prefix), wal_dest) except CommandFailedException, e: msg = "data transfer failure while copying WAL files " \ "to directory '%s'" % (wal_dest[1:],) raise DataTransferFailure.from_rsync_error(e, msg)