def test_retry(self, sleep_moc): """ Test the retry method :param sleep_moc: mimic the sleep timer """ # BackupManager setup backup_manager = build_backup_manager() backup_manager.config.basebackup_retry_times = 5 backup_manager.config.basebackup_retry_sleep = 10 f = Mock() # check for correct return value r = backup_manager.retry_backup_copy(f, 'test string') f.assert_called_with('test string') assert f.return_value == r # check for correct number of calls expected = Mock() f = Mock(side_effect=[DataTransferFailure('testException'), expected]) r = backup_manager.retry_backup_copy(f, 'test string') assert f.call_count == 2 # check for correct number of tries and invocations of sleep method sleep_moc.reset_mock() e = DataTransferFailure('testException') f = Mock(side_effect=[e, e, e, e, e, e]) with pytest.raises(DataTransferFailure): backup_manager.retry_backup_copy(f, 'test string') assert sleep_moc.call_count == 5 assert f.call_count == 6
def _xlog_copy(self, required_xlog_files, wal_dest, remote_command): """ Restore WAL segments :param required_xlog_files: list of all required WAL files :param wal_dest: the destination directory for xlog recover :param remote_command: default None. The remote command to recover the xlog, in case of remote backup. """ # List of required WAL files partitioned by containing directory xlogs = collections.defaultdict(list) # add '/' suffix to ensure it is a directory wal_dest = '%s/' % wal_dest # Map of every compressor used with any WAL file in the archive, # to be used during this recovery compressors = {} compression_manager = self.backup_manager.compression_manager # Fill xlogs and compressors maps from required_xlog_files for wal_info in required_xlog_files: hashdir = xlog.hash_dir(wal_info.name) xlogs[hashdir].append(wal_info) # If a compressor is required, make sure it exists in the cache if wal_info.compression is not None and \ wal_info.compression not in compressors: compressors[wal_info.compression] = \ compression_manager.get_compressor( compression=wal_info.compression) rsync = RsyncPgData( path=self.server.path, ssh=remote_command, bwlimit=self.config.bandwidth_limit, network_compression=self.config.network_compression) # If compression is used and this is a remote recovery, we need a # temporary directory where to spool uncompressed files, # otherwise we either decompress every WAL file in the local # destination, or we ship the uncompressed file remotely if compressors: if remote_command: # Decompress to a temporary spool directory wal_decompression_dest = tempfile.mkdtemp( prefix='barman_xlog-') else: # Decompress directly to the destination directory wal_decompression_dest = wal_dest # Make sure wal_decompression_dest exists mkpath(wal_decompression_dest) else: # If no compression wal_decompression_dest = None if remote_command: # If remote recovery tell rsync to copy them remotely # add ':' prefix to mark it as remote wal_dest = ':%s' % wal_dest total_wals = sum(map(len, xlogs.values())) partial_count = 0 for prefix in sorted(xlogs): batch_len = len(xlogs[prefix]) partial_count += batch_len source_dir = os.path.join(self.config.wals_directory, prefix) _logger.info("Starting copy of %s WAL files %s/%s from %s to %s", batch_len, partial_count, total_wals, xlogs[prefix][0], xlogs[prefix][-1]) # If at least one compressed file has been found, activate # compression check and decompression for each WAL files if compressors: for segment in xlogs[prefix]: dst_file = os.path.join(wal_decompression_dest, segment.name) if segment.compression is not None: compressors[segment.compression].decompress( os.path.join(source_dir, segment.name), dst_file) else: shutil.copy2(os.path.join(source_dir, segment.name), dst_file) if remote_command: try: # Transfer the WAL files rsync.from_file_list( list(segment.name for segment in xlogs[prefix]), wal_decompression_dest, wal_dest) except CommandFailedException as e: msg = ("data transfer failure while copying WAL files " "to directory '%s'") % (wal_dest[1:], ) raise DataTransferFailure.from_command_error( 'rsync', e, msg) # Cleanup files after the transfer for segment in xlogs[prefix]: file_name = os.path.join(wal_decompression_dest, segment.name) try: os.unlink(file_name) except OSError as e: output.warning( "Error removing temporary file '%s': %s", file_name, e) else: try: rsync.from_file_list( list(segment.name for segment in xlogs[prefix]), "%s/" % os.path.join(self.config.wals_directory, prefix), wal_dest) except CommandFailedException as e: msg = "data transfer failure while copying WAL files " \ "to directory '%s'" % (wal_dest[1:],) raise DataTransferFailure.from_command_error( 'rsync', e, msg) _logger.info("Finished copying %s WAL files.", total_wals) # Remove local decompression target directory if different from the # destination directory (it happens when compression is in use during a # remote recovery if wal_decompression_dest and wal_decompression_dest != wal_dest: shutil.rmtree(wal_decompression_dest)
def _backup_copy(self, backup_info, dest, tablespaces=None, remote_command=None, safe_horizon=None): """ Perform the actual copy of the base backup for recovery purposes First, it copies one tablespace at a time, then the PGDATA directory. Bandwidth limitation, according to configuration, is applied in the process. TODO: manage configuration files if outside PGDATA. :param barman.infofile.BackupInfo backup_info: the backup to recover :param str dest: the destination directory :param dict[str,str]|None tablespaces: a tablespace name -> location map (for relocation) :param str|None remote_command: default None. The remote command to recover the base backup, in case of remote backup. :param datetime.datetime|None safe_horizon: anything after this time has to be checked with checksum """ # Set a ':' prefix to remote destinations dest_prefix = '' if remote_command: dest_prefix = ':' # Create the copy controller object, specific for rsync, # which will drive all the copy operations. Items to be # copied are added before executing the copy() method controller = RsyncCopyController( path=self.server.path, ssh_command=remote_command, network_compression=self.config.network_compression, safe_horizon=safe_horizon, retry_times=self.config.basebackup_retry_times, retry_sleep=self.config.basebackup_retry_sleep, workers=self.config.parallel_jobs, ) # Dictionary for paths to be excluded from rsync exclude_and_protect = [] # Process every tablespace if backup_info.tablespaces: for tablespace in backup_info.tablespaces: # By default a tablespace goes in the same location where # it was on the source server when the backup was taken location = tablespace.location # If a relocation has been requested for this tablespace # use the user provided target directory if tablespaces and tablespace.name in tablespaces: location = tablespaces[tablespace.name] # If the tablespace location is inside the data directory, # exclude and protect it from being deleted during # the data directory copy if location.startswith(dest): exclude_and_protect += [location[len(dest):]] # Exclude and protect the tablespace from being deleted during # the data directory copy exclude_and_protect.append("/pg_tblspc/%s" % tablespace.oid) # Add the tablespace directory to the list of objects # to be copied by the controller controller.add_directory( label=tablespace.name, src='%s/' % backup_info.get_data_directory(tablespace.oid), dst=dest_prefix + location, bwlimit=self.config.get_bwlimit(tablespace), item_class=controller.TABLESPACE_CLASS) # Add the PGDATA directory to the list of objects to be copied # by the controller controller.add_directory(label='pgdata', src='%s/' % backup_info.get_data_directory(), dst=dest_prefix + dest, bwlimit=self.config.get_bwlimit(), exclude=[ '/pg_log/*', '/pg_xlog/*', '/pg_wal/*', '/postmaster.pid', '/recovery.conf', '/tablespace_map', ], exclude_and_protect=exclude_and_protect, item_class=controller.PGDATA_CLASS) # TODO: Manage different location for configuration files # TODO: that were not within the data directory # Execute the copy try: controller.copy() # TODO: Improve the exception output except CommandFailedException as e: msg = "data transfer failure" raise DataTransferFailure.from_command_error('rsync', e, msg)
def _xlog_copy(self, required_xlog_files, wal_dest, remote_command): """ Restore WAL segments :param required_xlog_files: list of all required WAL files :param wal_dest: the destination directory for xlog recover :param remote_command: default None. The remote command to recover the xlog, in case of remote backup. """ # List of required WAL files partitioned by containing directory xlogs = collections.defaultdict(list) # add '/' suffix to ensure it is a directory wal_dest = '%s/' % wal_dest # Map of every compressor used with any WAL file in the archive, # to be used during this recovery compressors = {} compression_manager = self.backup_manager.compression_manager # Fill xlogs and compressors maps from required_xlog_files for wal_info in required_xlog_files: hashdir = xlog.hash_dir(wal_info.name) xlogs[hashdir].append(wal_info) # If a compressor is required, make sure it exists in the cache if wal_info.compression is not None and \ wal_info.compression not in compressors: compressors[wal_info.compression] = \ compression_manager.get_compressor( compression=wal_info.compression) rsync = RsyncPgData( path=self.server.path, ssh=remote_command, bwlimit=self.config.bandwidth_limit, network_compression=self.config.network_compression) # If compression is used and this is a remote recovery, we need a # temporary directory where to spool uncompressed files, # otherwise we either decompress every WAL file in the local # destination, or we ship the uncompressed file remotely if compressors: if remote_command: # Decompress to a temporary spool directory wal_decompression_dest = tempfile.mkdtemp( prefix='barman_xlog-') else: # Decompress directly to the destination directory wal_decompression_dest = wal_dest # Make sure wal_decompression_dest exists mkpath(wal_decompression_dest) else: # If no compression wal_decompression_dest = None if remote_command: # If remote recovery tell rsync to copy them remotely # add ':' prefix to mark it as remote wal_dest = ':%s' % wal_dest total_wals = sum(map(len, xlogs.values())) partial_count = 0 for prefix in sorted(xlogs): batch_len = len(xlogs[prefix]) partial_count += batch_len source_dir = os.path.join(self.config.wals_directory, prefix) _logger.info( "Starting copy of %s WAL files %s/%s from %s to %s", batch_len, partial_count, total_wals, xlogs[prefix][0], xlogs[prefix][-1]) # If at least one compressed file has been found, activate # compression check and decompression for each WAL files if compressors: for segment in xlogs[prefix]: dst_file = os.path.join(wal_decompression_dest, segment.name) if segment.compression is not None: compressors[segment.compression].decompress( os.path.join(source_dir, segment.name), dst_file) else: shutil.copy2(os.path.join(source_dir, segment.name), dst_file) if remote_command: try: # Transfer the WAL files rsync.from_file_list( list(segment.name for segment in xlogs[prefix]), wal_decompression_dest, wal_dest) except CommandFailedException as e: msg = ("data transfer failure while copying WAL files " "to directory '%s'") % (wal_dest[1:],) raise DataTransferFailure.from_command_error( 'rsync', e, msg) # Cleanup files after the transfer for segment in xlogs[prefix]: file_name = os.path.join(wal_decompression_dest, segment.name) try: os.unlink(file_name) except OSError as e: output.warning( "Error removing temporary file '%s': %s", file_name, e) else: try: rsync.from_file_list( list(segment.name for segment in xlogs[prefix]), "%s/" % os.path.join(self.config.wals_directory, prefix), wal_dest) except CommandFailedException as e: msg = "data transfer failure while copying WAL files " \ "to directory '%s'" % (wal_dest[1:],) raise DataTransferFailure.from_command_error( 'rsync', e, msg) _logger.info("Finished copying %s WAL files.", total_wals) # Remove local decompression target directory if different from the # destination directory (it happens when compression is in use during a # remote recovery if wal_decompression_dest and wal_decompression_dest != wal_dest: shutil.rmtree(wal_decompression_dest)
def _backup_copy(self, backup_info, dest, tablespaces=None, remote_command=None, safe_horizon=None): """ Perform the actual copy of the base backup for recovery purposes First, it copies one tablespace at a time, then the PGDATA directory. Bandwidth limitation, according to configuration, is applied in the process. TODO: manage configuration files if outside PGDATA. :param barman.infofile.BackupInfo backup_info: the backup to recover :param str dest: the destination directory :param dict[str,str]|None tablespaces: a tablespace name -> location map (for relocation) :param str|None remote_command: default None. The remote command to recover the base backup, in case of remote backup. :param datetime.datetime|None safe_horizon: anything after this time has to be checked with checksum """ # Set a ':' prefix to remote destinations dest_prefix = '' if remote_command: dest_prefix = ':' # Create the copy controller object, specific for rsync, # which will drive all the copy operations. Items to be # copied are added before executing the copy() method controller = RsyncCopyController( path=self.server.path, ssh_command=remote_command, network_compression=self.config.network_compression, safe_horizon=safe_horizon, retry_times=self.config.basebackup_retry_times, retry_sleep=self.config.basebackup_retry_sleep, ) # Dictionary for paths to be excluded from rsync exclude_and_protect = [] # Process every tablespace if backup_info.tablespaces: for tablespace in backup_info.tablespaces: # By default a tablespace goes in the same location where # it was on the source server when the backup was taken location = tablespace.location # If a relocation has been requested for this tablespace # use the user provided target directory if tablespaces and tablespace.name in tablespaces: location = tablespaces[tablespace.name] # If the tablespace location is inside the data directory, # exclude and protect it from being deleted during # the data directory copy if location.startswith(dest): exclude_and_protect += [location[len(dest):]] # Exclude and protect the tablespace from being deleted during # the data directory copy exclude_and_protect.append("/pg_tblspc/%s" % tablespace.oid) # Add the tablespace directory to the list of objects # to be copied by the controller controller.add_directory( label=tablespace.name, src='%s/' % backup_info.get_data_directory(tablespace.oid), dst=dest_prefix + location, bwlimit=self.config.get_bwlimit(tablespace), item_class=controller.TABLESPACE_CLASS ) # Add the PGDATA directory to the list of objects to be copied # by the controller controller.add_directory( label='pgdata', src='%s/' % backup_info.get_data_directory(), dst=dest_prefix + dest, bwlimit=self.config.get_bwlimit(), exclude=[ '/pg_xlog/*', '/pg_log/*', '/recovery.conf', '/postmaster.pid'], exclude_and_protect=exclude_and_protect, item_class=controller.PGDATA_CLASS ) # TODO: Manage different location for configuration files # TODO: that were not within the data directory # Execute the copy try: controller.copy() # TODO: Improve the exception output except CommandFailedException as e: msg = "data transfer failure" raise DataTransferFailure.from_command_error( 'rsync', e, msg)
def basebackup_copy(self, backup_info, dest, tablespaces=None, remote_command=None, safe_horizon=None): """ Perform the actual copy of the base backup for recovery purposes :param barman.infofile.BackupInfo backup_info: the backup to recover :param str dest: the destination directory :param dict[str,str]|None tablespaces: a tablespace name -> location map (for relocation) :param str|None remote_command: default None. The remote command to recover the base backup, in case of remote backup. :param datetime.datetime|None safe_horizon: anything after this time has to be checked with checksum """ # Dictionary for paths to be excluded from rsync exclude_and_protect = [] # Set a ':' prefix to remote destinations dest_prefix = '' if remote_command: dest_prefix = ':' # Copy tablespaces applying bwlimit when necessary if backup_info.tablespaces: tablespaces_bw_limit = self.config.tablespace_bandwidth_limit # Copy a tablespace at a time for tablespace in backup_info.tablespaces: # Apply bandwidth limit if requested bwlimit = self.config.bandwidth_limit if tablespaces_bw_limit and \ tablespace.name in tablespaces_bw_limit: bwlimit = tablespaces_bw_limit[tablespace.name] # By default a tablespace goes in the same location where # it was on the source server when the backup was taken location = tablespace.location # If a relocation has been requested for this tablespace # use the user provided target directory if tablespaces and tablespace.name in tablespaces: location = tablespaces[tablespace.name] # If the tablespace location is inside the data directory, # exclude and protect it from being deleted during # the data directory copy if location.startswith(dest): exclude_and_protect.append(location[len(dest):]) # Exclude and protect the tablespace from being deleted during # the data directory copy exclude_and_protect.append("/pg_tblspc/%s" % tablespace.oid) # Copy the tablespace using smart copy tb_rsync = RsyncPgData( path=self.server.path, ssh=remote_command, bwlimit=bwlimit, network_compression=self.config.network_compression, check=True) try: tb_rsync.smart_copy( '%s/' % backup_info.get_data_directory(tablespace.oid), dest_prefix + location, safe_horizon) except CommandFailedException as e: msg = "data transfer failure on directory '%s'" % location raise DataTransferFailure.from_command_error( 'rsync', e, msg) # Copy the pgdata directory rsync = RsyncPgData( path=self.server.path, ssh=remote_command, bwlimit=self.config.bandwidth_limit, exclude_and_protect=exclude_and_protect, network_compression=self.config.network_compression) try: rsync.smart_copy('%s/' % backup_info.get_data_directory(), dest_prefix + dest, safe_horizon) except CommandFailedException as e: msg = "data transfer failure on directory '%s'" % dest raise DataTransferFailure.from_command_error('rsync', e, msg)