def delete_remote_basebackup(self, site, basebackup, metadata): start_time = time.monotonic() storage = self.site_transfers.get(site) main_backup_key = os.path.join(self.config["backup_sites"][site]["prefix"], "basebackup", basebackup) basebackup_data_files = [main_backup_key] if metadata.get("format") == "pghoard-bb-v2": bmeta_compressed = storage.get_contents_to_string(main_backup_key)[0] with rohmufile.file_reader(fileobj=io.BytesIO(bmeta_compressed), metadata=metadata, key_lookup=config.key_lookup_for_site(self.config, site)) as input_obj: bmeta = extract_pghoard_bb_v2_metadata(input_obj) self.log.debug("PGHoard chunk metadata: %r", bmeta) for chunk in bmeta["chunks"]: basebackup_data_files.append(os.path.join( self.config["backup_sites"][site]["prefix"], "basebackup_chunk", chunk["chunk_filename"], )) self.log.debug("Deleting basebackup datafiles: %r", ', '.join(basebackup_data_files)) for obj_key in basebackup_data_files: try: storage.delete_key(obj_key) except FileNotFoundFromStorageError: self.log.info("Tried to delete non-existent basebackup %r", obj_key) except Exception as ex: # FIXME: don't catch all exceptions; pylint: disable=broad-except self.log.exception("Problem deleting: %r", obj_key) self.metrics.unexpected_exception(ex, where="delete_remote_basebackup") self.log.info("Deleted basebackup datafiles: %r, took: %.2fs", ', '.join(basebackup_data_files), time.monotonic() - start_time)
def _list_existing_files(self): """Iterate through all manifest files and fetch information about hash files""" all_snapshot_files: Dict[str, SnapshotFile] = {} for backup in self.get_remote_basebackups_info(self.site): if backup["metadata"].get("format") != BaseBackupFormat.delta_v1: continue key = os.path.join(self.site_config["prefix"], "basebackup", backup["name"]) bmeta_compressed = self.storage.get_contents_to_string(key)[0] with rohmufile.file_reader( fileobj=io.BytesIO(bmeta_compressed), metadata=backup["metadata"], key_lookup=lambda key_id: self.site_config[ "encryption_keys"][key_id]["private"]) as input_obj: meta = extract_pghoard_delta_v1_metadata(input_obj) manifest = meta["manifest"] snapshot_result = manifest["snapshot_result"] backup_state = snapshot_result["state"] files = backup_state["files"] for delta_file in files: snapshot_file = SnapshotFile.parse_obj(delta_file) if snapshot_file.hexdigest: all_snapshot_files[snapshot_file.hexdigest] = snapshot_file return all_snapshot_files
def delete_remote_basebackup(self, site, basebackup, metadata): start_time = time.monotonic() storage = self.site_transfers.get(site) main_backup_key = os.path.join(self.config["backup_sites"][site]["prefix"], "basebackup", basebackup) basebackup_data_files = [main_backup_key] if metadata.get("format") == "pghoard-bb-v2": bmeta_compressed = storage.get_contents_to_string(main_backup_key)[0] with rohmufile.file_reader(fileobj=io.BytesIO(bmeta_compressed), metadata=metadata, key_lookup=config.key_lookup_for_site(self.config, site)) as input_obj: bmeta = extract_pghoard_bb_v2_metadata(input_obj) self.log.debug("PGHoard chunk metadata: %r", bmeta) for chunk in bmeta["chunks"]: basebackup_data_files.append(os.path.join( self.config["backup_sites"][site]["prefix"], "basebackup_chunk", chunk["chunk_filename"], )) self.log.debug("Deleting basebackup datafiles: %r", ', '.join(basebackup_data_files)) for obj_key in basebackup_data_files: try: storage.delete_key(obj_key) except FileNotFoundFromStorageError: self.log.info("Tried to delete non-existent basebackup %r", obj_key) except Exception as ex: # FIXME: don't catch all exceptions; pylint: disable=broad-except self.log.exception("Problem deleting: %r", obj_key) self.metrics.unexpected_exception(ex, where="delete_remote_basebackup") self.log.info("Deleted basebackup datafiles: %r, took: %.2fs", ', '.join(basebackup_data_files), time.monotonic() - start_time)
def extract_one_backup(self, *, obj, metadata, pgdata, site, tablespaces): with obj: with rohmufile.file_reader(fileobj=obj, metadata=metadata, key_lookup=config.key_lookup_for_site(self.config, site)) as input_obj: if metadata.get("format") in ("pghoard-bb-v1", "pghoard-bb-v2"): self._extract_pghoard_bb_v1_v2(input_obj, pgdata, tablespaces) elif not metadata.get("format"): self._extract_basic(input_obj, pgdata) else: raise RestoreError("Unrecognized basebackup format {!r}".format(metadata.get("format"))) self.log.info("Extracted %r %r", obj, metadata)
def _get_delta_basebackup_files(self, site, storage, metadata, basebackup_name_to_delete, backups_to_keep): all_hexdigests = set() keep_hexdigests = set() basebackup_data_files = list() for backup_name in [basebackup_name_to_delete ] + [back["name"] for back in backups_to_keep]: delta_backup_key = os.path.join(self._get_site_prefix(site), "basebackup", backup_name) bmeta_compressed = storage.get_contents_to_string( delta_backup_key)[0] with rohmufile.file_reader(fileobj=io.BytesIO(bmeta_compressed), metadata=metadata, key_lookup=config.key_lookup_for_site( self.config, site)) as input_obj: meta = extract_pghoard_delta_v1_metadata(input_obj) manifest = meta["manifest"] snapshot_result = manifest["snapshot_result"] backup_state = snapshot_result["state"] files = backup_state["files"] backup_hexdigests = set(delta_file["hexdigest"] for delta_file in files if delta_file["hexdigest"]) all_hexdigests |= backup_hexdigests if backup_name != basebackup_name_to_delete: # Keep data file in case if there is still a reference from other backups keep_hexdigests |= backup_hexdigests # Remove unreferenced files extra_hexdigests = set(all_hexdigests).difference(keep_hexdigests) for hexdigest in extra_hexdigests: basebackup_data_files.append( os.path.join(self._get_site_prefix(site), "basebackup_delta", hexdigest)) return basebackup_data_files
def _get_delta_basebackup_data(self, site, metadata, basebackup_name): basebackup_data_files = [] bmeta_compressed = self.storage.get_file_bytes(basebackup_name) with rohmufile.file_reader(fileobj=io.BytesIO(bmeta_compressed), metadata=metadata, key_lookup=config.key_lookup_for_site( self.config, site)) as input_obj: bmeta = common.extract_pghoard_delta_v1_metadata(input_obj) self.log.debug("Delta backup metadata: %r", bmeta) delta_objects_path = os.path.join(self._get_site_prefix(site), "basebackup_delta") manifest = bmeta["manifest"] snapshot_result = manifest["snapshot_result"] backup_state = snapshot_result["state"] files = backup_state["files"] empty_dirs = backup_state["empty_dirs"] tablespaces = bmeta["tablespaces"] for delta_file in files: if delta_file["hexdigest"]: basebackup_data_files.append( FilePathInfo(name=os.path.join(delta_objects_path, delta_file["hexdigest"]), size=delta_file["stored_file_size"], new_name=delta_file["relative_path"], file_type=FileInfoType.delta)) elif delta_file["content_b64"] is not None: # Restore embed files basebackup_data_files.append( FileDataInfo(data=base64.b64decode( delta_file["content_b64"]), metadata=metadata, size=delta_file["file_size"], new_name=delta_file["relative_path"], file_type=FileInfoType.delta)) basebackup_data_files.append( FileDataInfo(data=bmeta_compressed, metadata=metadata, size=0)) return tablespaces, basebackup_data_files, empty_dirs
def _get_basebackup(self, pgdata, basebackup, site, primary_conninfo=None, recovery_end_command=None, recovery_target_action=None, recovery_target_name=None, recovery_target_time=None, recovery_target_xid=None, restore_to_master=None, overwrite=False, tablespace_mapping=None): targets = [ recovery_target_name, recovery_target_time, recovery_target_xid ] if sum(0 if flag is None else 1 for flag in targets) > 1: raise RestoreError("Specify at most one of recovery_target_name, " "recovery_target_time or recovery_target_xid") # If basebackup that we want it set as latest, figure out which one it is if recovery_target_time: try: recovery_target_time = dates.parse_timestamp( recovery_target_time) except (TypeError, ValueError) as ex: raise RestoreError("recovery_target_time {!r}: {}".format( recovery_target_time, ex)) basebackup = self._find_nearest_basebackup(recovery_target_time) elif basebackup == "latest": basebackup = self._find_nearest_basebackup() # Grab basebackup metadata to make sure it exists and to look up tablespace requirements metadata = self.storage.get_basebackup_metadata(basebackup) # Make sure we have a proper place to write the $PGDATA and possible tablespaces dirs_to_create = [] dirs_to_recheck = [] dirs_to_wipe = [] if not os.path.exists(pgdata): dirs_to_create.append(pgdata) elif overwrite: dirs_to_create.append(pgdata) dirs_to_wipe.append(pgdata) elif os.listdir(pgdata) in ([], ["lost+found"]): # Allow empty directories as well as ext3/4 mount points to be used, but check that we can write to them dirs_to_recheck.append(["$PGDATA", pgdata]) else: raise RestoreError( "$PGDATA target directory {!r} exists, is not empty and --overwrite not specified, aborting." .format(pgdata)) tablespaces = {} tsmetare = re.compile("^tablespace-name-([0-9]+)$") for kw, value in metadata.items(): match = tsmetare.match(kw) if not match: continue tsoid = match.group(1) tsname = value tspath = tablespace_mapping.pop( tsname, metadata["tablespace-path-{}".format(tsoid)]) if not os.path.exists(tspath): raise RestoreError( "Tablespace {!r} target directory {!r} does not exist, aborting." .format(tsname, tspath)) if os.listdir(tspath) not in ([], ["lost+found"]): # Allow empty directories as well as ext3/4 mount points to be used, but check that we can write to them raise RestoreError( "Tablespace {!r} target directory {!r} exists but is not empty, aborting." .format(tsname, tspath)) print("Using existing empty directory {!r} for tablespace {!r}". format(tspath, tsname)) tablespaces[tsname] = { "oid": int(tsoid), "path": tspath, } dirs_to_recheck.append(["Tablespace {!r}".format(tsname), tspath]) # We .pop() the elements of tablespace_mapping above - if mappings are given they must all exist or the # user probably made a typo with tablespace names, abort in that case. if tablespace_mapping: raise RestoreError( "Tablespace mapping for {} was requested, but the tablespaces are not present in the backup" .format(sorted(tablespace_mapping))) # First check that the existing (empty) directories are writable, then possibly wipe any directories as # requested by --overwrite and finally create the new dirs for diruse, dirname in dirs_to_recheck: try: tempfile.TemporaryFile(dir=dirname).close() except PermissionError: raise RestoreError( "{} target directory {!r} is empty, but not writable, aborting." .format(diruse, dirname)) for dirname in dirs_to_wipe: shutil.rmtree(dirname) for dirname in dirs_to_create: os.makedirs(dirname) os.chmod(dirname, 0o700) def download_progress(current_pos, expected_max, end=""): print("\rDownload progress: {:.2%}".format(current_pos / expected_max), end=end) with tempfile.TemporaryFile(dir=self.config["backup_location"], prefix="basebackup.", suffix=".pghoard") as tmp: self.storage.get_basebackup_file_to_fileobj( basebackup, tmp, progress_callback=download_progress) download_progress(1, 1, end="\n") tmp.seek(0) with rohmufile.file_reader(fileobj=tmp, metadata=metadata, key_lookup=config.key_lookup_for_site( self.config, site)) as input_obj: if metadata.get("format") == "pghoard-bb-v1": self._extract_pghoard_bb_v1(input_obj, pgdata, tablespaces) else: self._extract_basic(input_obj, pgdata) create_recovery_conf( dirpath=pgdata, site=site, port=self.config["http_port"], primary_conninfo=primary_conninfo, recovery_end_command=recovery_end_command, recovery_target_action=recovery_target_action, recovery_target_name=recovery_target_name, recovery_target_time=recovery_target_time, recovery_target_xid=recovery_target_xid, restore_to_master=restore_to_master, ) print("Basebackup restoration complete.") print("You can start PostgreSQL by running pg_ctl -D %s start" % pgdata) print( "On systemd based systems you can run systemctl start postgresql") print( "On SYSV Init based systems you can run /etc/init.d/postgresql start" )
def _get_basebackup(self, pgdata, basebackup, site, primary_conninfo=None, recovery_end_command=None, recovery_target_action=None, recovery_target_name=None, recovery_target_time=None, recovery_target_xid=None, restore_to_master=None, overwrite=False, tablespace_mapping=None): targets = [recovery_target_name, recovery_target_time, recovery_target_xid] if sum(0 if flag is None else 1 for flag in targets) > 1: raise RestoreError("Specify at most one of recovery_target_name, " "recovery_target_time or recovery_target_xid") # If basebackup that we want it set as latest, figure out which one it is if recovery_target_time: try: recovery_target_time = dateutil.parser.parse(recovery_target_time) except (TypeError, ValueError) as ex: raise RestoreError("recovery_target_time {!r}: {}".format(recovery_target_time, ex)) basebackup = self._find_nearest_basebackup(recovery_target_time) elif basebackup == "latest": basebackup = self._find_nearest_basebackup() # Grab basebackup metadata to make sure it exists and to look up tablespace requirements metadata = self.storage.get_basebackup_metadata(basebackup) # Make sure we have a proper place to write the $PGDATA and possible tablespaces dirs_to_create = [] dirs_to_recheck = [] dirs_to_wipe = [] if not os.path.exists(pgdata): dirs_to_create.append(pgdata) elif overwrite: dirs_to_create.append(pgdata) dirs_to_wipe.append(pgdata) elif os.listdir(pgdata) in ([], ["lost+found"]): # Allow empty directories as well as ext3/4 mount points to be used, but check that we can write to them dirs_to_recheck.append(["$PGDATA", pgdata]) else: raise RestoreError("$PGDATA target directory {!r} exists, is not empty and --overwrite not specified, aborting." .format(pgdata)) tablespaces = {} tsmetare = re.compile("^tablespace-name-([0-9]+)$") for kw, value in metadata.items(): match = tsmetare.match(kw) if not match: continue tsoid = match.group(1) tsname = value tspath = tablespace_mapping.pop(tsname, metadata["tablespace-path-{}".format(tsoid)]) if not os.path.exists(tspath): raise RestoreError("Tablespace {!r} target directory {!r} does not exist, aborting." .format(tsname, tspath)) if os.listdir(tspath) not in ([], ["lost+found"]): # Allow empty directories as well as ext3/4 mount points to be used, but check that we can write to them raise RestoreError("Tablespace {!r} target directory {!r} exists but is not empty, aborting." .format(tsname, tspath)) print("Using existing empty directory {!r} for tablespace {!r}".format(tspath, tsname)) tablespaces[tsname] = { "oid": int(tsoid), "path": tspath, } dirs_to_recheck.append(["Tablespace {!r}".format(tsname), tspath]) # We .pop() the elements of tablespace_mapping above - if mappings are given they must all exist or the # user probably made a typo with tablespace names, abort in that case. if tablespace_mapping: raise RestoreError("Tablespace mapping for {} was requested, but the tablespaces are not present in the backup" .format(sorted(tablespace_mapping))) # First check that the existing (empty) directories are writable, then possibly wipe any directories as # requested by --overwrite and finally create the new dirs for diruse, dirname in dirs_to_recheck: try: tempfile.TemporaryFile(dir=dirname).close() except PermissionError: raise RestoreError("{} target directory {!r} is empty, but not writable, aborting." .format(diruse, dirname)) for dirname in dirs_to_wipe: shutil.rmtree(dirname) for dirname in dirs_to_create: os.makedirs(dirname) os.chmod(dirname, 0o700) with tempfile.TemporaryFile(dir=self.config["backup_location"], prefix="basebackup.", suffix=".pghoard") as tmp: self.storage.get_basebackup_file_to_fileobj(basebackup, tmp) tmp.seek(0) with rohmufile.file_reader(fileobj=tmp, metadata=metadata, key_lookup=config.key_lookup_for_site(self.config, site)) as input_obj: if metadata.get("format") == "pghoard-bb-v1": self._extract_pghoard_bb_v1(input_obj, pgdata, tablespaces) else: self._extract_basic(input_obj, pgdata) create_recovery_conf( dirpath=pgdata, site=site, port=self.config["http_port"], primary_conninfo=primary_conninfo, recovery_end_command=recovery_end_command, recovery_target_action=recovery_target_action, recovery_target_name=recovery_target_name, recovery_target_time=recovery_target_time, recovery_target_xid=recovery_target_xid, restore_to_master=restore_to_master, ) print("Basebackup restoration complete.") print("You can start PostgreSQL by running pg_ctl -D %s start" % pgdata) print("On systemd based systems you can run systemctl start postgresql") print("On SYSV Init based systems you can run /etc/init.d/postgresql start")
def _get_basebackup(self, pgdata, basebackup, site, debug=False, status_output_file=None, primary_conninfo=None, recovery_end_command=None, recovery_target_action=None, recovery_target_name=None, recovery_target_time=None, recovery_target_xid=None, restore_to_master=None, overwrite=False, tablespace_mapping=None, tablespace_base_dir=None): targets = [ recovery_target_name, recovery_target_time, recovery_target_xid ] if sum(0 if flag is None else 1 for flag in targets) > 1: raise RestoreError("Specify at most one of recovery_target_name, " "recovery_target_time or recovery_target_xid") # If basebackup that we want it set as latest, figure out which one it is if recovery_target_time: try: recovery_target_time = dates.parse_timestamp( recovery_target_time) except (TypeError, ValueError) as ex: raise RestoreError("recovery_target_time {!r}: {}".format( recovery_target_time, ex)) basebackup = self._find_nearest_basebackup(recovery_target_time) elif basebackup == "latest": basebackup = self._find_nearest_basebackup() elif isinstance(basebackup, str): basebackup = self._find_basebackup_for_name(basebackup) # Grab basebackup metadata to make sure it exists and to look up tablespace requirements metadata = self.storage.get_basebackup_metadata(basebackup["name"]) tablespaces = {} # Make sure we have a proper place to write the $PGDATA and possible tablespaces dirs_to_create = [] dirs_to_recheck = [] dirs_to_wipe = [] if not os.path.exists(pgdata): dirs_to_create.append(pgdata) elif overwrite: dirs_to_create.append(pgdata) dirs_to_wipe.append(pgdata) elif os.listdir(pgdata) in ([], ["lost+found"]): # Allow empty directories as well as ext3/4 mount points to be used, but check that we can write to them dirs_to_recheck.append(["$PGDATA", pgdata]) else: raise RestoreError( "$PGDATA target directory {!r} exists, is not empty and --overwrite not specified, aborting." .format(pgdata)) if metadata.get("format") == "pghoard-bb-v2": # "Backup file" is a metadata object, fetch it to get more information bmeta_compressed = self.storage.get_file_bytes(basebackup["name"]) with rohmufile.file_reader(fileobj=io.BytesIO(bmeta_compressed), metadata=metadata, key_lookup=config.key_lookup_for_site( self.config, site)) as input_obj: bmeta = common.extract_pghoard_bb_v2_metadata(input_obj) self.log.debug("Backup metadata: %r", bmeta) tablespaces = bmeta["tablespaces"] basebackup_data_files = [[ os.path.join(self.config["backup_sites"][site]["prefix"], "basebackup_chunk", chunk["chunk_filename"]), chunk["result_size"], ] for chunk in bmeta["chunks"]] # We need the files from the main basebackup file too basebackup_data_files.append([(bmeta_compressed, metadata), 0]) elif metadata.get("format") == "pghoard-bb-v1": # Tablespace information stored in object store metadata, look it up tsmetare = re.compile("^tablespace-name-([0-9]+)$") for kw, value in metadata.items(): match = tsmetare.match(kw) if not match: continue tsoid = match.group(1) tsname = value tspath = metadata["tablespace-path-{}".format(tsoid)] tablespaces[tsname] = { "oid": int(tsoid), "path": tspath, } basebackup_data_files = [[basebackup["name"], basebackup["size"]]] else: # Object is a raw (encrypted, compressed) basebackup basebackup_data_files = [[basebackup["name"], basebackup["size"]]] if tablespace_base_dir and not os.path.exists( tablespace_base_dir) and not overwrite: # we just care that the dir exists, but we're OK if there are other objects there raise RestoreError( "Tablespace base directory {!r} does not exist, aborting.". format(tablespace_base_dir)) # Map tablespaces as requested and make sure the directories exist for tsname, tsinfo in tablespaces.items(): tspath = tablespace_mapping.pop(tsname, tsinfo["path"]) if tablespace_base_dir and not os.path.exists(tspath): tspath = os.path.join(tablespace_base_dir, str(tsinfo["oid"])) os.makedirs(tspath, exist_ok=True) if not os.path.exists(tspath): raise RestoreError( "Tablespace {!r} target directory {!r} does not exist, aborting." .format(tsname, tspath)) if os.listdir(tspath) not in ([], ["lost+found"]): # Allow empty directories as well as ext3/4 mount points to be used, but check that we can write to them raise RestoreError( "Tablespace {!r} target directory {!r} exists but is not empty, aborting." .format(tsname, tspath)) tsinfo["path"] = tspath print("Using existing empty directory {!r} for tablespace {!r}". format(tspath, tsname)) dirs_to_recheck.append(["Tablespace {!r}".format(tsname), tspath]) # We .pop() the elements of tablespace_mapping above - if mappings are given they must all exist or the # user probably made a typo with tablespace names, abort in that case. if tablespace_mapping: raise RestoreError( "Tablespace mapping for {} was requested, but the tablespaces are not present in the backup" .format(sorted(tablespace_mapping))) # First check that the existing (empty) directories are writable, then possibly wipe any directories as # requested by --overwrite and finally create the new dirs for diruse, dirname in dirs_to_recheck: try: tempfile.TemporaryFile(dir=dirname).close() except PermissionError: raise RestoreError( "{} target directory {!r} is empty, but not writable, aborting." .format(diruse, dirname)) for dirname in dirs_to_wipe: shutil.rmtree(dirname) for dirname in dirs_to_create: os.makedirs(dirname) os.chmod(dirname, 0o700) fetcher = BasebackupFetcher( app_config=self.config, data_files=basebackup_data_files, status_output_file=status_output_file, debug=debug, pgdata=pgdata, site=site, tablespaces=tablespaces, ) fetcher.fetch_all() create_recovery_conf( dirpath=pgdata, site=site, port=self.config["http_port"], primary_conninfo=primary_conninfo, recovery_end_command=recovery_end_command, recovery_target_action=recovery_target_action, recovery_target_name=recovery_target_name, recovery_target_time=recovery_target_time, recovery_target_xid=recovery_target_xid, restore_to_master=restore_to_master, ) print("Basebackup restoration complete.") print("You can start PostgreSQL by running pg_ctl -D %s start" % pgdata) print( "On systemd based systems you can run systemctl start postgresql") print( "On SYSV Init based systems you can run /etc/init.d/postgresql start" )
def _get_basebackup(self, pgdata, basebackup, site, primary_conninfo=None, recovery_end_command=None, recovery_target_action=None, recovery_target_name=None, recovery_target_time=None, recovery_target_xid=None, restore_to_master=None, overwrite=False, tablespace_mapping=None): targets = [recovery_target_name, recovery_target_time, recovery_target_xid] if sum(0 if flag is None else 1 for flag in targets) > 1: raise RestoreError("Specify at most one of recovery_target_name, " "recovery_target_time or recovery_target_xid") # If basebackup that we want it set as latest, figure out which one it is if recovery_target_time: try: recovery_target_time = dates.parse_timestamp(recovery_target_time) except (TypeError, ValueError) as ex: raise RestoreError("recovery_target_time {!r}: {}".format(recovery_target_time, ex)) basebackup = self._find_nearest_basebackup(recovery_target_time) elif basebackup == "latest": basebackup = self._find_nearest_basebackup() # Grab basebackup metadata to make sure it exists and to look up tablespace requirements metadata = self.storage.get_basebackup_metadata(basebackup) tablespaces = {} # Make sure we have a proper place to write the $PGDATA and possible tablespaces dirs_to_create = [] dirs_to_recheck = [] dirs_to_wipe = [] if not os.path.exists(pgdata): dirs_to_create.append(pgdata) elif overwrite: dirs_to_create.append(pgdata) dirs_to_wipe.append(pgdata) elif os.listdir(pgdata) in ([], ["lost+found"]): # Allow empty directories as well as ext3/4 mount points to be used, but check that we can write to them dirs_to_recheck.append(["$PGDATA", pgdata]) else: raise RestoreError("$PGDATA target directory {!r} exists, is not empty and --overwrite not specified, aborting." .format(pgdata)) if metadata.get("format") == "pghoard-bb-v2": # "Backup file" is a metadata object, fetch it to get more information bmeta_compressed = self.storage.get_file_bytes(basebackup) with rohmufile.file_reader(fileobj=io.BytesIO(bmeta_compressed), metadata=metadata, key_lookup=config.key_lookup_for_site(self.config, site)) as input_obj: bmeta = common.extract_pghoard_bb_v2_metadata(input_obj) self.log.debug("Backup metadata: %r", bmeta) tablespaces = bmeta["tablespaces"] basebackup_data_files = [ [ os.path.join(self.config["path_prefix"], site, "basebackup_chunk", chunk["chunk_filename"]), chunk["result_size"], ] for chunk in bmeta["chunks"] ] # We need the files from the main basebackup file too basebackup_data_files.append([(io.BytesIO(bmeta_compressed), metadata), 0]) elif metadata.get("format") == "pghoard-bb-v1": # Tablespace information stored in object store metadata, look it up tsmetare = re.compile("^tablespace-name-([0-9]+)$") for kw, value in metadata.items(): match = tsmetare.match(kw) if not match: continue tsoid = match.group(1) tsname = value tspath = metadata["tablespace-path-{}".format(tsoid)] tablespaces[tsname] = { "oid": int(tsoid), "path": tspath, } basebackup_data_files = [[basebackup, -1]] else: # Object is a raw (encrypted, compressed) basebackup basebackup_data_files = [[basebackup, -1]] # Map tablespaces as requested and make sure the directories exist for tsname, tsinfo in tablespaces.items(): tspath = tablespace_mapping.pop(tsname, tsinfo["path"]) if not os.path.exists(tspath): raise RestoreError("Tablespace {!r} target directory {!r} does not exist, aborting." .format(tsname, tspath)) if os.listdir(tspath) not in ([], ["lost+found"]): # Allow empty directories as well as ext3/4 mount points to be used, but check that we can write to them raise RestoreError("Tablespace {!r} target directory {!r} exists but is not empty, aborting." .format(tsname, tspath)) tsinfo["path"] = tspath print("Using existing empty directory {!r} for tablespace {!r}".format(tspath, tsname)) dirs_to_recheck.append(["Tablespace {!r}".format(tsname), tspath]) # We .pop() the elements of tablespace_mapping above - if mappings are given they must all exist or the # user probably made a typo with tablespace names, abort in that case. if tablespace_mapping: raise RestoreError("Tablespace mapping for {} was requested, but the tablespaces are not present in the backup" .format(sorted(tablespace_mapping))) # First check that the existing (empty) directories are writable, then possibly wipe any directories as # requested by --overwrite and finally create the new dirs for diruse, dirname in dirs_to_recheck: try: tempfile.TemporaryFile(dir=dirname).close() except PermissionError: raise RestoreError("{} target directory {!r} is empty, but not writable, aborting." .format(diruse, dirname)) for dirname in dirs_to_wipe: shutil.rmtree(dirname) for dirname in dirs_to_create: os.makedirs(dirname) os.chmod(dirname, 0o700) total_download_size = sum(item[1] for item in basebackup_data_files) progress_report_time = [0] download_errors = 0 extract_errors = 0 with futures.ThreadPoolExecutor(max_workers=self.config["compression"]["thread_count"]) as extract_executor: extract_jobs = [] with futures.ThreadPoolExecutor(max_workers=self.config["transfer"]["thread_count"]) as download_executor: download_jobs = [] download_progress_per_file = { basebackup_data_file: 0 for basebackup_data_file, _ in basebackup_data_files if not isinstance(basebackup_data_file, tuple) } def download_progress(end=""): # report max once per second if time.monotonic() - progress_report_time[0] < 1: return progress_report_time[0] = time.monotonic() total_downloaded = sum(download_progress_per_file.values()) if total_download_size <= 0: progress = 0 else: progress = total_downloaded / total_download_size print("\rDownload progress: {progress:.2%} ({dl_mib:.0f} / {total_mib:.0f} MiB)\r".format( progress=progress, dl_mib=total_downloaded / (1024 ** 2), total_mib=total_download_size / (1024 ** 2), ), end=end) for basebackup_data_file, backup_data_file_size in basebackup_data_files: if isinstance(basebackup_data_file, tuple): tmp_obj, tmp_metadata = basebackup_data_file extract_jobs.append(extract_executor.submit( self.extract_one_backup, obj=tmp_obj, metadata=tmp_metadata, pgdata=pgdata, site=site, tablespaces=tablespaces, )) continue def single_download_progress(current_pos, expected_max, this_file_name=basebackup_data_file, this_file_size=backup_data_file_size): download_progress_per_file[this_file_name] = this_file_size * (current_pos / expected_max) download_progress() # NOTE: Most of the transfer clients aren't thread-safe, so initialize a new transfer # client for each download. We could use thread local storage or pooling here, but # probably not worth the trouble for this use case. transfer = get_transfer(common.get_object_storage_config(self.config, site)) download_jobs.append(download_executor.submit( self.download_one_backup, basebackup_data_file=basebackup_data_file, progress_callback=single_download_progress, site=site, transfer=transfer, )) for future in futures.as_completed(download_jobs): if future.exception(): self.log.error("Got error from chunk download: %s", future.exception()) download_errors += 1 continue tmp_obj, tmp_metadata = future.result() extract_jobs.append(extract_executor.submit( self.extract_one_backup, obj=tmp_obj, metadata=tmp_metadata, pgdata=pgdata, site=site, tablespaces=tablespaces, )) progress_report_time[0] = 0 download_progress(end="\n") for future in futures.as_completed(extract_jobs): if future.exception(): self.log.error("Got error from chunk extraction: %s", future.exception()) extract_errors += 1 continue if download_errors: raise RestoreError("Backup download failed with {} errors".format(download_errors)) if extract_errors: raise RestoreError("Backup extraction failed with {} errors".format(extract_errors)) create_recovery_conf( dirpath=pgdata, site=site, port=self.config["http_port"], primary_conninfo=primary_conninfo, recovery_end_command=recovery_end_command, recovery_target_action=recovery_target_action, recovery_target_name=recovery_target_name, recovery_target_time=recovery_target_time, recovery_target_xid=recovery_target_xid, restore_to_master=restore_to_master, ) print("Basebackup restoration complete.") print("You can start PostgreSQL by running pg_ctl -D %s start" % pgdata) print("On systemd based systems you can run systemctl start postgresql") print("On SYSV Init based systems you can run /etc/init.d/postgresql start")
def _get_basebackup(self, pgdata, basebackup, site, debug=False, status_output_file=None, primary_conninfo=None, recovery_end_command=None, recovery_target_action=None, recovery_target_name=None, recovery_target_time=None, recovery_target_xid=None, restore_to_master=None, overwrite=False, tablespace_mapping=None, tablespace_base_dir=None): targets = [recovery_target_name, recovery_target_time, recovery_target_xid] if sum(0 if flag is None else 1 for flag in targets) > 1: raise RestoreError("Specify at most one of recovery_target_name, " "recovery_target_time or recovery_target_xid") # If basebackup that we want it set as latest, figure out which one it is if recovery_target_time: try: recovery_target_time = dates.parse_timestamp(recovery_target_time) except (TypeError, ValueError) as ex: raise RestoreError("recovery_target_time {!r}: {}".format(recovery_target_time, ex)) basebackup = self._find_nearest_basebackup(recovery_target_time) elif basebackup == "latest": basebackup = self._find_nearest_basebackup() elif isinstance(basebackup, str): basebackup = self._find_basebackup_for_name(basebackup) # Grab basebackup metadata to make sure it exists and to look up tablespace requirements metadata = self.storage.get_basebackup_metadata(basebackup["name"]) tablespaces = {} # Make sure we have a proper place to write the $PGDATA and possible tablespaces dirs_to_create = [] dirs_to_recheck = [] dirs_to_wipe = [] if not os.path.exists(pgdata): dirs_to_create.append(pgdata) elif overwrite: dirs_to_create.append(pgdata) dirs_to_wipe.append(pgdata) elif os.listdir(pgdata) in ([], ["lost+found"]): # Allow empty directories as well as ext3/4 mount points to be used, but check that we can write to them dirs_to_recheck.append(["$PGDATA", pgdata]) else: raise RestoreError("$PGDATA target directory {!r} exists, is not empty and --overwrite not specified, aborting." .format(pgdata)) if metadata.get("format") == "pghoard-bb-v2": # "Backup file" is a metadata object, fetch it to get more information bmeta_compressed = self.storage.get_file_bytes(basebackup["name"]) with rohmufile.file_reader(fileobj=io.BytesIO(bmeta_compressed), metadata=metadata, key_lookup=config.key_lookup_for_site(self.config, site)) as input_obj: bmeta = common.extract_pghoard_bb_v2_metadata(input_obj) self.log.debug("Backup metadata: %r", bmeta) tablespaces = bmeta["tablespaces"] basebackup_data_files = [ [ os.path.join(self.config["backup_sites"][site]["prefix"], "basebackup_chunk", chunk["chunk_filename"]), chunk["result_size"], ] for chunk in bmeta["chunks"] ] # We need the files from the main basebackup file too basebackup_data_files.append([(bmeta_compressed, metadata), 0]) elif metadata.get("format") == "pghoard-bb-v1": # Tablespace information stored in object store metadata, look it up tsmetare = re.compile("^tablespace-name-([0-9]+)$") for kw, value in metadata.items(): match = tsmetare.match(kw) if not match: continue tsoid = match.group(1) tsname = value tspath = metadata["tablespace-path-{}".format(tsoid)] tablespaces[tsname] = { "oid": int(tsoid), "path": tspath, } basebackup_data_files = [[basebackup["name"], basebackup["size"]]] else: # Object is a raw (encrypted, compressed) basebackup basebackup_data_files = [[basebackup["name"], basebackup["size"]]] if tablespace_base_dir and not os.path.exists(tablespace_base_dir) and not overwrite: # we just care that the dir exists, but we're OK if there are other objects there raise RestoreError("Tablespace base directory {!r} does not exist, aborting." .format(tablespace_base_dir)) # Map tablespaces as requested and make sure the directories exist for tsname, tsinfo in tablespaces.items(): tspath = tablespace_mapping.pop(tsname, tsinfo["path"]) if tablespace_base_dir and not os.path.exists(tspath): tspath = os.path.join(tablespace_base_dir, str(tsinfo["oid"])) os.makedirs(tspath, exist_ok=True) if not os.path.exists(tspath): raise RestoreError("Tablespace {!r} target directory {!r} does not exist, aborting." .format(tsname, tspath)) if os.listdir(tspath) not in ([], ["lost+found"]): # Allow empty directories as well as ext3/4 mount points to be used, but check that we can write to them raise RestoreError("Tablespace {!r} target directory {!r} exists but is not empty, aborting." .format(tsname, tspath)) tsinfo["path"] = tspath print("Using existing empty directory {!r} for tablespace {!r}".format(tspath, tsname)) dirs_to_recheck.append(["Tablespace {!r}".format(tsname), tspath]) # We .pop() the elements of tablespace_mapping above - if mappings are given they must all exist or the # user probably made a typo with tablespace names, abort in that case. if tablespace_mapping: raise RestoreError("Tablespace mapping for {} was requested, but the tablespaces are not present in the backup" .format(sorted(tablespace_mapping))) # First check that the existing (empty) directories are writable, then possibly wipe any directories as # requested by --overwrite and finally create the new dirs for diruse, dirname in dirs_to_recheck: try: tempfile.TemporaryFile(dir=dirname).close() except PermissionError: raise RestoreError("{} target directory {!r} is empty, but not writable, aborting." .format(diruse, dirname)) for dirname in dirs_to_wipe: shutil.rmtree(dirname) for dirname in dirs_to_create: os.makedirs(dirname) os.chmod(dirname, 0o700) fetcher = BasebackupFetcher( app_config=self.config, data_files=basebackup_data_files, status_output_file=status_output_file, debug=debug, pgdata=pgdata, site=site, tablespaces=tablespaces, ) fetcher.fetch_all() create_recovery_conf( dirpath=pgdata, site=site, port=self.config["http_port"], primary_conninfo=primary_conninfo, recovery_end_command=recovery_end_command, recovery_target_action=recovery_target_action, recovery_target_name=recovery_target_name, recovery_target_time=recovery_target_time, recovery_target_xid=recovery_target_xid, restore_to_master=restore_to_master, ) print("Basebackup restoration complete.") print("You can start PostgreSQL by running pg_ctl -D %s start" % pgdata) print("On systemd based systems you can run systemctl start postgresql") print("On SYSV Init based systems you can run /etc/init.d/postgresql start")