def tar_one_file(self, *, temp_dir, chunk_path, files_to_backup, callback_queue, filetype="basebackup_chunk", extra_metadata=None): start_time = time.monotonic() encryption_key_id = self.config["backup_sites"][self.site]["encryption_key_id"] if encryption_key_id: rsa_public_key = self.config["backup_sites"][self.site]["encryption_keys"][encryption_key_id]["public"] else: rsa_public_key = None with NamedTemporaryFile(dir=temp_dir, prefix=os.path.basename(chunk_path), suffix=".tmp") as raw_output_obj: # pylint: disable=bad-continuation with rohmufile.file_writer( compression_algorithm=self.config["compression"]["algorithm"], compression_level=self.config["compression"]["level"], rsa_public_key=rsa_public_key, fileobj=raw_output_obj) as output_obj: with tarfile.TarFile(fileobj=output_obj, mode="w") as output_tar: self.write_files_to_tar(files=files_to_backup, tar=output_tar) input_size = output_obj.tell() result_size = raw_output_obj.tell() # Make the file persist over the with-block with this hardlink os.link(raw_output_obj.name, chunk_path) rohmufile.log_compression_result( encrypted=True if encryption_key_id else False, elapsed=time.monotonic() - start_time, original_size=input_size, result_size=result_size, source_name="$PGDATA files ({})".format(len(files_to_backup)), log_func=self.log.info, ) metadata = { "compression-algorithm": self.config["compression"]["algorithm"], "encryption-key-id": encryption_key_id, "format": "pghoard-bb-v2", "original-file-size": input_size, } if extra_metadata: metadata.update(extra_metadata) self.transfer_queue.put({ "callback_queue": callback_queue, "file_size": result_size, "filetype": filetype, "local_path": chunk_path, "metadata": metadata, "site": self.site, "type": "UPLOAD", }) # Get the name of the chunk and the name of the parent directory (ie backup "name") chunk_name = "/".join(chunk_path.split("/")[-2:]) return chunk_name, input_size, result_size
def write_backup_files(what): for bb, bb_data in what.items(): wal_start, hexdigests = bb_data if bb: bb_path = os.path.join(basebackup_storage_path, bb) date_parts = [ int(part) for part in bb.replace("_", "-").split("-") ] start_time = datetime.datetime( *date_parts, tzinfo=datetime.timezone.utc) metadata = { "manifest": { "snapshot_result": { "state": { "files": [{ "relative_path": h, "hexdigest": h } for h in hexdigests] } } } } mtime = time.time() blob = io.BytesIO(common.json_encode(metadata, binary=True)) ti = tarfile.TarInfo(name=".pghoard_tar_metadata.json") ti.size = len(blob.getbuffer()) ti.mtime = mtime with open(bb_path, "wb") as fp: with rohmufile.file_writer( compression_algorithm="snappy", compression_level=0, fileobj=fp) as output_obj: with tarfile.TarFile(fileobj=output_obj, mode="w") as tar: tar.addfile(ti, blob) input_size = output_obj.tell() for h in hexdigests: with open(Path(basebackup_delta_path) / h, "w") as digest_file, \ open((Path(basebackup_delta_path) / (h + ".metadata")), "w") as digest_meta_file: json.dump({}, digest_file) json.dump({}, digest_meta_file) with open(bb_path + ".metadata", "w") as fp: json.dump( { "start-wal-segment": wal_start, "start-time": start_time.isoformat(), "format": BaseBackupFormat.delta_v1, "compression-algorithm": "snappy", "original-file-size": input_size }, fp)
def run_local_tar_basebackup(self): _, compressed_basebackup = self.get_paths_for_backup(self.basebackup_path) compression_algorithm = self.config["compression"]["algorithm"] compression_level = self.config["compression"]["level"] rsa_public_key = None encryption_key_id = self.config["backup_sites"][self.site]["encryption_key_id"] if encryption_key_id: rsa_public_key = self.config["backup_sites"][self.site]["encryption_keys"][encryption_key_id]["public"] self.log.debug("Connecting to database to start backup process") connection_string = connection_string_using_pgpass(self.connection_info) with psycopg2.connect(connection_string) as db_conn: cursor = db_conn.cursor() cursor.execute("SELECT pg_start_backup(%s)", [BASEBACKUP_NAME]) try: cursor.execute("SELECT setting FROM pg_settings WHERE name='data_directory'") pgdata = cursor.fetchone()[0] # Look up tablespaces and resolve their current filesystem locations cursor.execute("SELECT oid, spcname FROM pg_tablespace WHERE spcname NOT IN ('pg_default', 'pg_global')") tablespaces = { spcname: { "path": os.readlink(os.path.join(pgdata, "pg_tblspc", str(oid))), "oid": oid, } for oid, spcname in cursor.fetchall() } with open(os.path.join(pgdata, "backup_label"), "rb") as fp: start_wal_segment, backup_start_time = self.parse_backup_label(fp.read()) self.log.info("Starting to backup %r to %r", pgdata, compressed_basebackup) start_time = time.monotonic() with NamedTemporaryFile(prefix=compressed_basebackup, suffix=".tmp-compress") as raw_output_obj: with rohmufile.file_writer(fileobj=raw_output_obj, compression_algorithm=compression_algorithm, compression_level=compression_level, rsa_public_key=rsa_public_key) as output_obj: with tarfile.open(fileobj=output_obj, mode="w|") as output_tar: self.write_files_to_tar(pgdata=pgdata, tablespaces=tablespaces, tar=output_tar) input_size = output_obj.tell() os.link(raw_output_obj.name, compressed_basebackup) result_size = raw_output_obj.tell() rohmufile.log_compression_result( elapsed=time.monotonic() - start_time, encrypted=True if rsa_public_key else False, log_func=self.log.info, original_size=input_size, result_size=result_size, source_name=pgdata, ) finally: db_conn.rollback() cursor.execute("SELECT pg_stop_backup()") metadata = { "compression-algorithm": compression_algorithm, "encryption-key-id": encryption_key_id, "format": "pghoard-bb-v1", "original-file-size": input_size, "pg-version": self.pg_version_server, "start-time": backup_start_time, "start-wal-segment": start_wal_segment, } for spcname, spcinfo in tablespaces.items(): metadata["tablespace-name-{}".format(spcinfo["oid"])] = spcname metadata["tablespace-path-{}".format(spcinfo["oid"])] = spcinfo["path"] self.transfer_queue.put({ "callback_queue": self.callback_queue, "file_size": result_size, "filetype": "basebackup", "local_path": compressed_basebackup, "metadata": metadata, "site": self.site, "type": "UPLOAD", })
def run_local_tar_basebackup(self): pgdata = self.config["backup_sites"][self.site]["pg_data_directory"] if not os.path.isdir(pgdata): raise errors.InvalidConfigurationError( "pg_data_directory {!r} does not exist".format(pgdata)) temp_basebackup_dir, compressed_basebackup = self.get_paths_for_backup( self.basebackup_path) compression_algorithm = self.config["compression"]["algorithm"] compression_level = self.config["compression"]["level"] rsa_public_key = None encryption_key_id = self.config["backup_sites"][ self.site]["encryption_key_id"] if encryption_key_id: rsa_public_key = self.config["backup_sites"][ self.site]["encryption_keys"][encryption_key_id]["public"] self.log.debug("Connecting to database to start backup process") connection_string = connection_string_using_pgpass( self.connection_info) with psycopg2.connect(connection_string) as db_conn: cursor = db_conn.cursor() if self.pg_version_server >= 90600: # We'll always use the the non-exclusive backup mode on 9.6 and newer cursor.execute("SELECT pg_start_backup(%s, false, false)", [BASEBACKUP_NAME]) backup_label = None backup_mode = "non-exclusive" else: # On older versions, first check if we're in recovery, and find out the version of a possibly # installed pgespresso extension. We use pgespresso's backup control functions when they're # available, and require them in case we're running on a replica. We also make sure the # extension version is 1.2 or newer to prevent crashing when using tablespaces. cursor.execute( "SELECT pg_is_in_recovery(), " " (SELECT extversion FROM pg_extension WHERE extname = 'pgespresso')" ) in_recovery, pgespresso_version = cursor.fetchone() if in_recovery and (not pgespresso_version or pgespresso_version < "1.2"): raise errors.InvalidConfigurationError( "pgespresso version 1.2 or higher must be installed " "to take `local-tar` backups from a replica") if pgespresso_version and pgespresso_version >= "1.2": cursor.execute("SELECT pgespresso_start_backup(%s, false)", [BASEBACKUP_NAME]) backup_label = cursor.fetchone()[0] backup_mode = "pgespresso" else: try: cursor.execute("SELECT pg_start_backup(%s)", [BASEBACKUP_NAME]) except psycopg2.OperationalError as ex: self.log.warning( "Exclusive pg_start_backup() failed: %s: %s", ex.__class__.__name__, ex) db_conn.rollback() if "a backup is already in progress" not in str(ex): raise self.log.info("Calling pg_stop_backup() and retrying") cursor.execute("SELECT pg_stop_backup()") cursor.execute("SELECT pg_start_backup(%s)", [BASEBACKUP_NAME]) with open(os.path.join(pgdata, "backup_label"), "r") as fp: backup_label = fp.read() backup_mode = "legacy" backup_stopped = False try: # Look up tablespaces and resolve their current filesystem locations cursor.execute( "SELECT oid, spcname FROM pg_tablespace WHERE spcname NOT IN ('pg_default', 'pg_global')" ) tablespaces = { spcname: { "path": os.readlink(os.path.join(pgdata, "pg_tblspc", str(oid))), "oid": oid, } for oid, spcname in cursor.fetchall() } db_conn.commit() self.log.info("Starting to backup %r to %r", pgdata, compressed_basebackup) start_time = time.monotonic() with NamedTemporaryFile( dir=temp_basebackup_dir, prefix="data.", suffix=".tmp-compress") as raw_output_obj: with rohmufile.file_writer( fileobj=raw_output_obj, compression_algorithm=compression_algorithm, compression_level=compression_level, rsa_public_key=rsa_public_key) as output_obj: with tarfile.TarFile(fileobj=output_obj, mode="w") as output_tar: self.write_init_entries_to_tar( pgdata=pgdata, tablespaces=tablespaces, tar=output_tar) files = self.find_files_to_backup( pgdata=pgdata, tablespaces=tablespaces) # NOTE: generator self.write_files_to_tar(files=files, tar=output_tar) self.write_pg_control_to_tar(pgdata=pgdata, tar=output_tar) # Call the stop backup functions now to get backup label for 9.6+ non-exclusive backups if backup_mode == "non-exclusive": cursor.execute( "SELECT labelfile FROM pg_stop_backup(false)" ) backup_label = cursor.fetchone()[0] elif backup_mode == "pgespresso": cursor.execute( "SELECT pgespresso_stop_backup(%s)", [backup_label]) else: cursor.execute("SELECT pg_stop_backup()") db_conn.commit() backup_stopped = True backup_label_data = backup_label.encode("utf-8") self.write_backup_label_to_tar( tar=output_tar, backup_label=backup_label_data) input_size = output_obj.tell() os.link(raw_output_obj.name, compressed_basebackup) result_size = raw_output_obj.tell() rohmufile.log_compression_result( elapsed=time.monotonic() - start_time, encrypted=True if rsa_public_key else False, log_func=self.log.info, original_size=input_size, result_size=result_size, source_name=pgdata, ) finally: db_conn.rollback() if not backup_stopped: if backup_mode == "non-exclusive": cursor.execute("SELECT pg_stop_backup(false)") elif backup_mode == "pgespresso": cursor.execute("SELECT pgespresso_stop_backup(%s)", [backup_label]) else: cursor.execute("SELECT pg_stop_backup()") db_conn.commit() backup_end_time, backup_end_wal_segment = self.get_backup_end_time_and_segment( db_conn, backup_mode) backup_start_wal_segment, backup_start_time = self.parse_backup_label( backup_label_data) metadata = { "compression-algorithm": compression_algorithm, "encryption-key-id": encryption_key_id, "end-time": backup_end_time, "end-wal-segment": backup_end_wal_segment, "format": "pghoard-bb-v1", "original-file-size": input_size, "pg-version": self.pg_version_server, "start-time": backup_start_time, "start-wal-segment": backup_start_wal_segment, } for spcname, spcinfo in tablespaces.items(): metadata["tablespace-name-{}".format(spcinfo["oid"])] = spcname metadata["tablespace-path-{}".format( spcinfo["oid"])] = spcinfo["path"] self.transfer_queue.put({ "callback_queue": self.callback_queue, "file_size": result_size, "filetype": "basebackup", "local_path": compressed_basebackup, "metadata": metadata, "site": self.site, "type": "UPLOAD", })
def tar_one_file( self, *, temp_dir, chunk_path, files_to_backup, callback_queue, filetype="basebackup_chunk", extra_metadata=None ): start_time = time.monotonic() with NamedTemporaryFile(dir=temp_dir, prefix=os.path.basename(chunk_path), suffix=".tmp") as raw_output_obj: # pylint: disable=bad-continuation with rohmufile.file_writer( compression_algorithm=self.compression_data.algorithm, compression_level=self.compression_data.level, compression_threads=self.site_config["basebackup_compression_threads"], rsa_public_key=self.encryption_data.rsa_public_key, fileobj=raw_output_obj ) as output_obj: with tarfile.TarFile(fileobj=output_obj, mode="w") as output_tar: self.write_files_to_tar(files=files_to_backup, tar=output_tar) input_size = output_obj.tell() result_size = raw_output_obj.tell() # Make the file persist over the with-block with this hardlink os.link(raw_output_obj.name, chunk_path) rohmufile.log_compression_result( encrypted=bool(self.encryption_data.encryption_key_id), elapsed=time.monotonic() - start_time, original_size=input_size, result_size=result_size, source_name="$PGDATA files ({})".format(len(files_to_backup)), log_func=self.log.info, ) size_ratio = result_size / input_size self.metrics.gauge( "pghoard.compressed_size_ratio", size_ratio, tags={ "algorithm": self.compression_data.algorithm, "site": self.site, "type": "basebackup", } ) metadata = { "compression-algorithm": self.compression_data.algorithm, "encryption-key-id": self.encryption_data.encryption_key_id, "format": BaseBackupFormat.v2, "original-file-size": input_size, "host": socket.gethostname(), } if extra_metadata: metadata.update(extra_metadata) self.transfer_queue.put({ "callback_queue": callback_queue, "file_size": result_size, "filetype": filetype, "local_path": chunk_path, "metadata": metadata, "site": self.site, "type": "UPLOAD", }) # Get the name of the chunk and the name of the parent directory (ie backup "name") chunk_name = "/".join(chunk_path.split("/")[-2:]) return chunk_name, input_size, result_size
def tar_one_file(self, *, temp_dir, chunk_path, files_to_backup, callback_queue, filetype="basebackup_chunk", extra_metadata=None): start_time = time.monotonic() encryption_key_id = self.config["backup_sites"][self.site]["encryption_key_id"] if encryption_key_id: rsa_public_key = self.config["backup_sites"][self.site]["encryption_keys"][encryption_key_id]["public"] else: rsa_public_key = None with NamedTemporaryFile(dir=temp_dir, prefix=os.path.basename(chunk_path), suffix=".tmp") as raw_output_obj: # pylint: disable=bad-continuation with rohmufile.file_writer( compression_algorithm=self.config["compression"]["algorithm"], compression_level=self.config["compression"]["level"], rsa_public_key=rsa_public_key, fileobj=raw_output_obj) as output_obj: with tarfile.TarFile(fileobj=output_obj, mode="w") as output_tar: self.write_files_to_tar(files=files_to_backup, tar=output_tar) input_size = output_obj.tell() result_size = raw_output_obj.tell() # Make the file persist over the with-block with this hardlink os.link(raw_output_obj.name, chunk_path) rohmufile.log_compression_result( encrypted=bool(encryption_key_id), elapsed=time.monotonic() - start_time, original_size=input_size, result_size=result_size, source_name="$PGDATA files ({})".format(len(files_to_backup)), log_func=self.log.info, ) size_ratio = result_size / input_size self.metrics.gauge( "pghoard.compressed_size_ratio", size_ratio, tags={ "algorithm": self.config["compression"]["algorithm"], "site": self.site, "type": "basebackup", } ) metadata = { "compression-algorithm": self.config["compression"]["algorithm"], "encryption-key-id": encryption_key_id, "format": "pghoard-bb-v2", "original-file-size": input_size, "host": socket.gethostname(), } if extra_metadata: metadata.update(extra_metadata) self.transfer_queue.put({ "callback_queue": callback_queue, "file_size": result_size, "filetype": filetype, "local_path": chunk_path, "metadata": metadata, "site": self.site, "type": "UPLOAD", }) # Get the name of the chunk and the name of the parent directory (ie backup "name") chunk_name = "/".join(chunk_path.split("/")[-2:]) return chunk_name, input_size, result_size
def run_local_tar_basebackup(self): pgdata = self.config["backup_sites"][self.site]["pg_data_directory"] if not os.path.isdir(pgdata): raise errors.InvalidConfigurationError("pg_data_directory {!r} does not exist".format(pgdata)) temp_basebackup_dir, compressed_basebackup = self.get_paths_for_backup(self.basebackup_path) compression_algorithm = self.config["compression"]["algorithm"] compression_level = self.config["compression"]["level"] rsa_public_key = None encryption_key_id = self.config["backup_sites"][self.site]["encryption_key_id"] if encryption_key_id: rsa_public_key = self.config["backup_sites"][self.site]["encryption_keys"][encryption_key_id]["public"] self.log.debug("Connecting to database to start backup process") connection_string = connection_string_using_pgpass(self.connection_info) with psycopg2.connect(connection_string) as db_conn: cursor = db_conn.cursor() if self.pg_version_server >= 90600: # We'll always use the the non-exclusive backup mode on 9.6 and newer cursor.execute("SELECT pg_start_backup(%s, false, false)", [BASEBACKUP_NAME]) backup_label = None backup_mode = "non-exclusive" else: # On older versions, first check if we're in recovery, and find out the version of a possibly # installed pgespresso extension. We use pgespresso's backup control functions when they're # available, and require them in case we're running on a replica. We also make sure the # extension version is 1.2 or newer to prevent crashing when using tablespaces. cursor.execute("SELECT pg_is_in_recovery(), " " (SELECT extversion FROM pg_extension WHERE extname = 'pgespresso')") in_recovery, pgespresso_version = cursor.fetchone() if in_recovery and (not pgespresso_version or pgespresso_version < "1.2"): raise errors.InvalidConfigurationError("pgespresso version 1.2 or higher must be installed " "to take `local-tar` backups from a replica") if pgespresso_version and pgespresso_version >= "1.2": cursor.execute("SELECT pgespresso_start_backup(%s, false)", [BASEBACKUP_NAME]) backup_label = cursor.fetchone()[0] backup_mode = "pgespresso" else: cursor.execute("SELECT pg_start_backup(%s)", [BASEBACKUP_NAME]) with open(os.path.join(pgdata, "backup_label"), "r") as fp: backup_label = fp.read() backup_mode = "legacy" backup_stopped = False try: # Look up tablespaces and resolve their current filesystem locations cursor.execute("SELECT oid, spcname FROM pg_tablespace WHERE spcname NOT IN ('pg_default', 'pg_global')") tablespaces = { spcname: { "path": os.readlink(os.path.join(pgdata, "pg_tblspc", str(oid))), "oid": oid, } for oid, spcname in cursor.fetchall() } db_conn.commit() self.log.info("Starting to backup %r to %r", pgdata, compressed_basebackup) start_time = time.monotonic() with NamedTemporaryFile(dir=temp_basebackup_dir, prefix="data.", suffix=".tmp-compress") as raw_output_obj: with rohmufile.file_writer(fileobj=raw_output_obj, compression_algorithm=compression_algorithm, compression_level=compression_level, rsa_public_key=rsa_public_key) as output_obj: with tarfile.TarFile(fileobj=output_obj, mode="w") as output_tar: self.write_init_entries_to_tar(pgdata=pgdata, tablespaces=tablespaces, tar=output_tar) files = self.find_files_to_backup(pgdata=pgdata, tablespaces=tablespaces) # NOTE: generator self.write_files_to_tar(files=files, tar=output_tar) self.write_pg_control_to_tar(pgdata=pgdata, tar=output_tar) # Call the stop backup functions now to get backup label for 9.6+ non-exclusive backups if backup_mode == "non-exclusive": cursor.execute("SELECT labelfile FROM pg_stop_backup(false)") backup_label = cursor.fetchone()[0] elif backup_mode == "pgespresso": cursor.execute("SELECT pgespresso_stop_backup(%s)", [backup_label]) else: cursor.execute("SELECT pg_stop_backup()") db_conn.commit() backup_stopped = True backup_label_data = backup_label.encode("utf-8") self.write_backup_label_to_tar(tar=output_tar, backup_label=backup_label_data) input_size = output_obj.tell() os.link(raw_output_obj.name, compressed_basebackup) result_size = raw_output_obj.tell() rohmufile.log_compression_result( elapsed=time.monotonic() - start_time, encrypted=True if rsa_public_key else False, log_func=self.log.info, original_size=input_size, result_size=result_size, source_name=pgdata, ) finally: db_conn.rollback() if not backup_stopped: if backup_mode == "non-exclusive": cursor.execute("SELECT pg_stop_backup(false)") elif backup_mode == "pgespresso": cursor.execute("SELECT pgespresso_stop_backup(%s)", [backup_label]) else: cursor.execute("SELECT pg_stop_backup()") db_conn.commit() start_wal_segment, backup_start_time = self.parse_backup_label(backup_label_data) metadata = { "compression-algorithm": compression_algorithm, "encryption-key-id": encryption_key_id, "format": "pghoard-bb-v1", "original-file-size": input_size, "pg-version": self.pg_version_server, "start-time": backup_start_time, "start-wal-segment": start_wal_segment, } for spcname, spcinfo in tablespaces.items(): metadata["tablespace-name-{}".format(spcinfo["oid"])] = spcname metadata["tablespace-path-{}".format(spcinfo["oid"])] = spcinfo["path"] self.transfer_queue.put({ "callback_queue": self.callback_queue, "file_size": result_size, "filetype": "basebackup", "local_path": compressed_basebackup, "metadata": metadata, "site": self.site, "type": "UPLOAD", })