예제 #1
0
    def delete_remote_basebackup(self, site, basebackup, metadata):
        start_time = time.monotonic()
        storage = self.site_transfers.get(site)
        main_backup_key = os.path.join(self.config["backup_sites"][site]["prefix"], "basebackup", basebackup)
        basebackup_data_files = [main_backup_key]

        if metadata.get("format") == "pghoard-bb-v2":
            bmeta_compressed = storage.get_contents_to_string(main_backup_key)[0]
            with rohmufile.file_reader(fileobj=io.BytesIO(bmeta_compressed), metadata=metadata,
                                       key_lookup=config.key_lookup_for_site(self.config, site)) as input_obj:
                bmeta = extract_pghoard_bb_v2_metadata(input_obj)
                self.log.debug("PGHoard chunk metadata: %r", bmeta)
                for chunk in bmeta["chunks"]:
                    basebackup_data_files.append(os.path.join(
                        self.config["backup_sites"][site]["prefix"],
                        "basebackup_chunk",
                        chunk["chunk_filename"],
                    ))

        self.log.debug("Deleting basebackup datafiles: %r", ', '.join(basebackup_data_files))
        for obj_key in basebackup_data_files:
            try:
                storage.delete_key(obj_key)
            except FileNotFoundFromStorageError:
                self.log.info("Tried to delete non-existent basebackup %r", obj_key)
            except Exception as ex:  # FIXME: don't catch all exceptions; pylint: disable=broad-except
                self.log.exception("Problem deleting: %r", obj_key)
                self.metrics.unexpected_exception(ex, where="delete_remote_basebackup")
        self.log.info("Deleted basebackup datafiles: %r, took: %.2fs",
                      ', '.join(basebackup_data_files), time.monotonic() - start_time)
예제 #2
0
    def _list_existing_files(self):
        """Iterate through all manifest files and fetch information about hash files"""
        all_snapshot_files: Dict[str, SnapshotFile] = {}
        for backup in self.get_remote_basebackups_info(self.site):
            if backup["metadata"].get("format") != BaseBackupFormat.delta_v1:
                continue

            key = os.path.join(self.site_config["prefix"], "basebackup",
                               backup["name"])
            bmeta_compressed = self.storage.get_contents_to_string(key)[0]

            with rohmufile.file_reader(
                    fileobj=io.BytesIO(bmeta_compressed),
                    metadata=backup["metadata"],
                    key_lookup=lambda key_id: self.site_config[
                        "encryption_keys"][key_id]["private"]) as input_obj:
                meta = extract_pghoard_delta_v1_metadata(input_obj)

            manifest = meta["manifest"]
            snapshot_result = manifest["snapshot_result"]
            backup_state = snapshot_result["state"]
            files = backup_state["files"]
            for delta_file in files:
                snapshot_file = SnapshotFile.parse_obj(delta_file)
                if snapshot_file.hexdigest:
                    all_snapshot_files[snapshot_file.hexdigest] = snapshot_file

        return all_snapshot_files
예제 #3
0
파일: pghoard.py 프로젝트: ohmu/pghoard
    def delete_remote_basebackup(self, site, basebackup, metadata):
        start_time = time.monotonic()
        storage = self.site_transfers.get(site)
        main_backup_key = os.path.join(self.config["backup_sites"][site]["prefix"], "basebackup", basebackup)
        basebackup_data_files = [main_backup_key]

        if metadata.get("format") == "pghoard-bb-v2":
            bmeta_compressed = storage.get_contents_to_string(main_backup_key)[0]
            with rohmufile.file_reader(fileobj=io.BytesIO(bmeta_compressed), metadata=metadata,
                                       key_lookup=config.key_lookup_for_site(self.config, site)) as input_obj:
                bmeta = extract_pghoard_bb_v2_metadata(input_obj)
                self.log.debug("PGHoard chunk metadata: %r", bmeta)
                for chunk in bmeta["chunks"]:
                    basebackup_data_files.append(os.path.join(
                        self.config["backup_sites"][site]["prefix"],
                        "basebackup_chunk",
                        chunk["chunk_filename"],
                    ))

        self.log.debug("Deleting basebackup datafiles: %r", ', '.join(basebackup_data_files))
        for obj_key in basebackup_data_files:
            try:
                storage.delete_key(obj_key)
            except FileNotFoundFromStorageError:
                self.log.info("Tried to delete non-existent basebackup %r", obj_key)
            except Exception as ex:  # FIXME: don't catch all exceptions; pylint: disable=broad-except
                self.log.exception("Problem deleting: %r", obj_key)
                self.metrics.unexpected_exception(ex, where="delete_remote_basebackup")
        self.log.info("Deleted basebackup datafiles: %r, took: %.2fs",
                      ', '.join(basebackup_data_files), time.monotonic() - start_time)
예제 #4
0
 def extract_one_backup(self, *, obj, metadata, pgdata, site, tablespaces):
     with obj:
         with rohmufile.file_reader(fileobj=obj, metadata=metadata,
                                    key_lookup=config.key_lookup_for_site(self.config, site)) as input_obj:
             if metadata.get("format") in ("pghoard-bb-v1", "pghoard-bb-v2"):
                 self._extract_pghoard_bb_v1_v2(input_obj, pgdata, tablespaces)
             elif not metadata.get("format"):
                 self._extract_basic(input_obj, pgdata)
             else:
                 raise RestoreError("Unrecognized basebackup format {!r}".format(metadata.get("format")))
     self.log.info("Extracted %r %r", obj, metadata)
예제 #5
0
    def _get_delta_basebackup_files(self, site, storage, metadata,
                                    basebackup_name_to_delete,
                                    backups_to_keep):
        all_hexdigests = set()
        keep_hexdigests = set()

        basebackup_data_files = list()
        for backup_name in [basebackup_name_to_delete
                            ] + [back["name"] for back in backups_to_keep]:
            delta_backup_key = os.path.join(self._get_site_prefix(site),
                                            "basebackup", backup_name)
            bmeta_compressed = storage.get_contents_to_string(
                delta_backup_key)[0]
            with rohmufile.file_reader(fileobj=io.BytesIO(bmeta_compressed),
                                       metadata=metadata,
                                       key_lookup=config.key_lookup_for_site(
                                           self.config, site)) as input_obj:
                meta = extract_pghoard_delta_v1_metadata(input_obj)

            manifest = meta["manifest"]
            snapshot_result = manifest["snapshot_result"]
            backup_state = snapshot_result["state"]
            files = backup_state["files"]

            backup_hexdigests = set(delta_file["hexdigest"]
                                    for delta_file in files
                                    if delta_file["hexdigest"])
            all_hexdigests |= backup_hexdigests

            if backup_name != basebackup_name_to_delete:
                # Keep data file in case if there is still a reference from other backups
                keep_hexdigests |= backup_hexdigests

        # Remove unreferenced files
        extra_hexdigests = set(all_hexdigests).difference(keep_hexdigests)
        for hexdigest in extra_hexdigests:
            basebackup_data_files.append(
                os.path.join(self._get_site_prefix(site), "basebackup_delta",
                             hexdigest))

        return basebackup_data_files
예제 #6
0
    def _get_delta_basebackup_data(self, site, metadata, basebackup_name):
        basebackup_data_files = []
        bmeta_compressed = self.storage.get_file_bytes(basebackup_name)
        with rohmufile.file_reader(fileobj=io.BytesIO(bmeta_compressed),
                                   metadata=metadata,
                                   key_lookup=config.key_lookup_for_site(
                                       self.config, site)) as input_obj:
            bmeta = common.extract_pghoard_delta_v1_metadata(input_obj)
        self.log.debug("Delta backup metadata: %r", bmeta)

        delta_objects_path = os.path.join(self._get_site_prefix(site),
                                          "basebackup_delta")

        manifest = bmeta["manifest"]
        snapshot_result = manifest["snapshot_result"]
        backup_state = snapshot_result["state"]
        files = backup_state["files"]
        empty_dirs = backup_state["empty_dirs"]
        tablespaces = bmeta["tablespaces"]
        for delta_file in files:
            if delta_file["hexdigest"]:
                basebackup_data_files.append(
                    FilePathInfo(name=os.path.join(delta_objects_path,
                                                   delta_file["hexdigest"]),
                                 size=delta_file["stored_file_size"],
                                 new_name=delta_file["relative_path"],
                                 file_type=FileInfoType.delta))
            elif delta_file["content_b64"] is not None:
                # Restore embed files
                basebackup_data_files.append(
                    FileDataInfo(data=base64.b64decode(
                        delta_file["content_b64"]),
                                 metadata=metadata,
                                 size=delta_file["file_size"],
                                 new_name=delta_file["relative_path"],
                                 file_type=FileInfoType.delta))

        basebackup_data_files.append(
            FileDataInfo(data=bmeta_compressed, metadata=metadata, size=0))

        return tablespaces, basebackup_data_files, empty_dirs
예제 #7
0
파일: restore.py 프로젝트: zvolsky/pghoard
    def _get_basebackup(self,
                        pgdata,
                        basebackup,
                        site,
                        primary_conninfo=None,
                        recovery_end_command=None,
                        recovery_target_action=None,
                        recovery_target_name=None,
                        recovery_target_time=None,
                        recovery_target_xid=None,
                        restore_to_master=None,
                        overwrite=False,
                        tablespace_mapping=None):
        targets = [
            recovery_target_name, recovery_target_time, recovery_target_xid
        ]
        if sum(0 if flag is None else 1 for flag in targets) > 1:
            raise RestoreError("Specify at most one of recovery_target_name, "
                               "recovery_target_time or recovery_target_xid")

        # If basebackup that we want it set as latest, figure out which one it is
        if recovery_target_time:
            try:
                recovery_target_time = dates.parse_timestamp(
                    recovery_target_time)
            except (TypeError, ValueError) as ex:
                raise RestoreError("recovery_target_time {!r}: {}".format(
                    recovery_target_time, ex))
            basebackup = self._find_nearest_basebackup(recovery_target_time)
        elif basebackup == "latest":
            basebackup = self._find_nearest_basebackup()

        # Grab basebackup metadata to make sure it exists and to look up tablespace requirements
        metadata = self.storage.get_basebackup_metadata(basebackup)

        # Make sure we have a proper place to write the $PGDATA and possible tablespaces
        dirs_to_create = []
        dirs_to_recheck = []
        dirs_to_wipe = []

        if not os.path.exists(pgdata):
            dirs_to_create.append(pgdata)
        elif overwrite:
            dirs_to_create.append(pgdata)
            dirs_to_wipe.append(pgdata)
        elif os.listdir(pgdata) in ([], ["lost+found"]):
            # Allow empty directories as well as ext3/4 mount points to be used, but check that we can write to them
            dirs_to_recheck.append(["$PGDATA", pgdata])
        else:
            raise RestoreError(
                "$PGDATA target directory {!r} exists, is not empty and --overwrite not specified, aborting."
                .format(pgdata))

        tablespaces = {}
        tsmetare = re.compile("^tablespace-name-([0-9]+)$")
        for kw, value in metadata.items():
            match = tsmetare.match(kw)
            if not match:
                continue
            tsoid = match.group(1)
            tsname = value
            tspath = tablespace_mapping.pop(
                tsname, metadata["tablespace-path-{}".format(tsoid)])
            if not os.path.exists(tspath):
                raise RestoreError(
                    "Tablespace {!r} target directory {!r} does not exist, aborting."
                    .format(tsname, tspath))
            if os.listdir(tspath) not in ([], ["lost+found"]):
                # Allow empty directories as well as ext3/4 mount points to be used, but check that we can write to them
                raise RestoreError(
                    "Tablespace {!r} target directory {!r} exists but is not empty, aborting."
                    .format(tsname, tspath))

            print("Using existing empty directory {!r} for tablespace {!r}".
                  format(tspath, tsname))
            tablespaces[tsname] = {
                "oid": int(tsoid),
                "path": tspath,
            }
            dirs_to_recheck.append(["Tablespace {!r}".format(tsname), tspath])

        # We .pop() the elements of tablespace_mapping above - if mappings are given they must all exist or the
        # user probably made a typo with tablespace names, abort in that case.
        if tablespace_mapping:
            raise RestoreError(
                "Tablespace mapping for {} was requested, but the tablespaces are not present in the backup"
                .format(sorted(tablespace_mapping)))

        # First check that the existing (empty) directories are writable, then possibly wipe any directories as
        # requested by --overwrite and finally create the new dirs
        for diruse, dirname in dirs_to_recheck:
            try:
                tempfile.TemporaryFile(dir=dirname).close()
            except PermissionError:
                raise RestoreError(
                    "{} target directory {!r} is empty, but not writable, aborting."
                    .format(diruse, dirname))

        for dirname in dirs_to_wipe:
            shutil.rmtree(dirname)
        for dirname in dirs_to_create:
            os.makedirs(dirname)
            os.chmod(dirname, 0o700)

        def download_progress(current_pos, expected_max, end=""):
            print("\rDownload progress: {:.2%}".format(current_pos /
                                                       expected_max),
                  end=end)

        with tempfile.TemporaryFile(dir=self.config["backup_location"],
                                    prefix="basebackup.",
                                    suffix=".pghoard") as tmp:
            self.storage.get_basebackup_file_to_fileobj(
                basebackup, tmp, progress_callback=download_progress)
            download_progress(1, 1, end="\n")
            tmp.seek(0)

            with rohmufile.file_reader(fileobj=tmp,
                                       metadata=metadata,
                                       key_lookup=config.key_lookup_for_site(
                                           self.config, site)) as input_obj:
                if metadata.get("format") == "pghoard-bb-v1":
                    self._extract_pghoard_bb_v1(input_obj, pgdata, tablespaces)
                else:
                    self._extract_basic(input_obj, pgdata)

        create_recovery_conf(
            dirpath=pgdata,
            site=site,
            port=self.config["http_port"],
            primary_conninfo=primary_conninfo,
            recovery_end_command=recovery_end_command,
            recovery_target_action=recovery_target_action,
            recovery_target_name=recovery_target_name,
            recovery_target_time=recovery_target_time,
            recovery_target_xid=recovery_target_xid,
            restore_to_master=restore_to_master,
        )

        print("Basebackup restoration complete.")
        print("You can start PostgreSQL by running pg_ctl -D %s start" %
              pgdata)
        print(
            "On systemd based systems you can run systemctl start postgresql")
        print(
            "On SYSV Init based systems you can run /etc/init.d/postgresql start"
        )
예제 #8
0
파일: restore.py 프로젝트: saaros/pghoard
    def _get_basebackup(self, pgdata, basebackup, site,
                        primary_conninfo=None,
                        recovery_end_command=None,
                        recovery_target_action=None,
                        recovery_target_name=None,
                        recovery_target_time=None,
                        recovery_target_xid=None,
                        restore_to_master=None,
                        overwrite=False,
                        tablespace_mapping=None):
        targets = [recovery_target_name, recovery_target_time, recovery_target_xid]
        if sum(0 if flag is None else 1 for flag in targets) > 1:
            raise RestoreError("Specify at most one of recovery_target_name, "
                               "recovery_target_time or recovery_target_xid")

        # If basebackup that we want it set as latest, figure out which one it is
        if recovery_target_time:
            try:
                recovery_target_time = dateutil.parser.parse(recovery_target_time)
            except (TypeError, ValueError) as ex:
                raise RestoreError("recovery_target_time {!r}: {}".format(recovery_target_time, ex))
            basebackup = self._find_nearest_basebackup(recovery_target_time)
        elif basebackup == "latest":
            basebackup = self._find_nearest_basebackup()

        # Grab basebackup metadata to make sure it exists and to look up tablespace requirements
        metadata = self.storage.get_basebackup_metadata(basebackup)

        # Make sure we have a proper place to write the $PGDATA and possible tablespaces
        dirs_to_create = []
        dirs_to_recheck = []
        dirs_to_wipe = []

        if not os.path.exists(pgdata):
            dirs_to_create.append(pgdata)
        elif overwrite:
            dirs_to_create.append(pgdata)
            dirs_to_wipe.append(pgdata)
        elif os.listdir(pgdata) in ([], ["lost+found"]):
            # Allow empty directories as well as ext3/4 mount points to be used, but check that we can write to them
            dirs_to_recheck.append(["$PGDATA", pgdata])
        else:
            raise RestoreError("$PGDATA target directory {!r} exists, is not empty and --overwrite not specified, aborting."
                               .format(pgdata))

        tablespaces = {}
        tsmetare = re.compile("^tablespace-name-([0-9]+)$")
        for kw, value in metadata.items():
            match = tsmetare.match(kw)
            if not match:
                continue
            tsoid = match.group(1)
            tsname = value
            tspath = tablespace_mapping.pop(tsname, metadata["tablespace-path-{}".format(tsoid)])
            if not os.path.exists(tspath):
                raise RestoreError("Tablespace {!r} target directory {!r} does not exist, aborting."
                                   .format(tsname, tspath))
            if os.listdir(tspath) not in ([], ["lost+found"]):
                # Allow empty directories as well as ext3/4 mount points to be used, but check that we can write to them
                raise RestoreError("Tablespace {!r} target directory {!r} exists but is not empty, aborting."
                                   .format(tsname, tspath))

            print("Using existing empty directory {!r} for tablespace {!r}".format(tspath, tsname))
            tablespaces[tsname] = {
                "oid": int(tsoid),
                "path": tspath,
            }
            dirs_to_recheck.append(["Tablespace {!r}".format(tsname), tspath])

        # We .pop() the elements of tablespace_mapping above - if mappings are given they must all exist or the
        # user probably made a typo with tablespace names, abort in that case.
        if tablespace_mapping:
            raise RestoreError("Tablespace mapping for {} was requested, but the tablespaces are not present in the backup"
                               .format(sorted(tablespace_mapping)))

        # First check that the existing (empty) directories are writable, then possibly wipe any directories as
        # requested by --overwrite and finally create the new dirs
        for diruse, dirname in dirs_to_recheck:
            try:
                tempfile.TemporaryFile(dir=dirname).close()
            except PermissionError:
                raise RestoreError("{} target directory {!r} is empty, but not writable, aborting."
                                   .format(diruse, dirname))

        for dirname in dirs_to_wipe:
            shutil.rmtree(dirname)
        for dirname in dirs_to_create:
            os.makedirs(dirname)
            os.chmod(dirname, 0o700)

        with tempfile.TemporaryFile(dir=self.config["backup_location"], prefix="basebackup.", suffix=".pghoard") as tmp:
            self.storage.get_basebackup_file_to_fileobj(basebackup, tmp)
            tmp.seek(0)

            with rohmufile.file_reader(fileobj=tmp, metadata=metadata,
                                       key_lookup=config.key_lookup_for_site(self.config, site)) as input_obj:
                if metadata.get("format") == "pghoard-bb-v1":
                    self._extract_pghoard_bb_v1(input_obj, pgdata, tablespaces)
                else:
                    self._extract_basic(input_obj, pgdata)

        create_recovery_conf(
            dirpath=pgdata,
            site=site,
            port=self.config["http_port"],
            primary_conninfo=primary_conninfo,
            recovery_end_command=recovery_end_command,
            recovery_target_action=recovery_target_action,
            recovery_target_name=recovery_target_name,
            recovery_target_time=recovery_target_time,
            recovery_target_xid=recovery_target_xid,
            restore_to_master=restore_to_master,
        )

        print("Basebackup restoration complete.")
        print("You can start PostgreSQL by running pg_ctl -D %s start" % pgdata)
        print("On systemd based systems you can run systemctl start postgresql")
        print("On SYSV Init based systems you can run /etc/init.d/postgresql start")
예제 #9
0
    def _get_basebackup(self,
                        pgdata,
                        basebackup,
                        site,
                        debug=False,
                        status_output_file=None,
                        primary_conninfo=None,
                        recovery_end_command=None,
                        recovery_target_action=None,
                        recovery_target_name=None,
                        recovery_target_time=None,
                        recovery_target_xid=None,
                        restore_to_master=None,
                        overwrite=False,
                        tablespace_mapping=None,
                        tablespace_base_dir=None):
        targets = [
            recovery_target_name, recovery_target_time, recovery_target_xid
        ]
        if sum(0 if flag is None else 1 for flag in targets) > 1:
            raise RestoreError("Specify at most one of recovery_target_name, "
                               "recovery_target_time or recovery_target_xid")

        # If basebackup that we want it set as latest, figure out which one it is
        if recovery_target_time:
            try:
                recovery_target_time = dates.parse_timestamp(
                    recovery_target_time)
            except (TypeError, ValueError) as ex:
                raise RestoreError("recovery_target_time {!r}: {}".format(
                    recovery_target_time, ex))
            basebackup = self._find_nearest_basebackup(recovery_target_time)
        elif basebackup == "latest":
            basebackup = self._find_nearest_basebackup()
        elif isinstance(basebackup, str):
            basebackup = self._find_basebackup_for_name(basebackup)

        # Grab basebackup metadata to make sure it exists and to look up tablespace requirements
        metadata = self.storage.get_basebackup_metadata(basebackup["name"])
        tablespaces = {}

        # Make sure we have a proper place to write the $PGDATA and possible tablespaces
        dirs_to_create = []
        dirs_to_recheck = []
        dirs_to_wipe = []

        if not os.path.exists(pgdata):
            dirs_to_create.append(pgdata)
        elif overwrite:
            dirs_to_create.append(pgdata)
            dirs_to_wipe.append(pgdata)
        elif os.listdir(pgdata) in ([], ["lost+found"]):
            # Allow empty directories as well as ext3/4 mount points to be used, but check that we can write to them
            dirs_to_recheck.append(["$PGDATA", pgdata])
        else:
            raise RestoreError(
                "$PGDATA target directory {!r} exists, is not empty and --overwrite not specified, aborting."
                .format(pgdata))

        if metadata.get("format") == "pghoard-bb-v2":
            # "Backup file" is a metadata object, fetch it to get more information
            bmeta_compressed = self.storage.get_file_bytes(basebackup["name"])
            with rohmufile.file_reader(fileobj=io.BytesIO(bmeta_compressed),
                                       metadata=metadata,
                                       key_lookup=config.key_lookup_for_site(
                                           self.config, site)) as input_obj:
                bmeta = common.extract_pghoard_bb_v2_metadata(input_obj)
            self.log.debug("Backup metadata: %r", bmeta)

            tablespaces = bmeta["tablespaces"]
            basebackup_data_files = [[
                os.path.join(self.config["backup_sites"][site]["prefix"],
                             "basebackup_chunk", chunk["chunk_filename"]),
                chunk["result_size"],
            ] for chunk in bmeta["chunks"]]
            # We need the files from the main basebackup file too
            basebackup_data_files.append([(bmeta_compressed, metadata), 0])

        elif metadata.get("format") == "pghoard-bb-v1":
            # Tablespace information stored in object store metadata, look it up
            tsmetare = re.compile("^tablespace-name-([0-9]+)$")
            for kw, value in metadata.items():
                match = tsmetare.match(kw)
                if not match:
                    continue
                tsoid = match.group(1)
                tsname = value
                tspath = metadata["tablespace-path-{}".format(tsoid)]
                tablespaces[tsname] = {
                    "oid": int(tsoid),
                    "path": tspath,
                }

            basebackup_data_files = [[basebackup["name"], basebackup["size"]]]

        else:
            # Object is a raw (encrypted, compressed) basebackup
            basebackup_data_files = [[basebackup["name"], basebackup["size"]]]

        if tablespace_base_dir and not os.path.exists(
                tablespace_base_dir) and not overwrite:
            # we just care that the dir exists, but we're OK if there are other objects there
            raise RestoreError(
                "Tablespace base directory {!r} does not exist, aborting.".
                format(tablespace_base_dir))

        # Map tablespaces as requested and make sure the directories exist
        for tsname, tsinfo in tablespaces.items():
            tspath = tablespace_mapping.pop(tsname, tsinfo["path"])
            if tablespace_base_dir and not os.path.exists(tspath):
                tspath = os.path.join(tablespace_base_dir, str(tsinfo["oid"]))
                os.makedirs(tspath, exist_ok=True)
            if not os.path.exists(tspath):
                raise RestoreError(
                    "Tablespace {!r} target directory {!r} does not exist, aborting."
                    .format(tsname, tspath))
            if os.listdir(tspath) not in ([], ["lost+found"]):
                # Allow empty directories as well as ext3/4 mount points to be used, but check that we can write to them
                raise RestoreError(
                    "Tablespace {!r} target directory {!r} exists but is not empty, aborting."
                    .format(tsname, tspath))

            tsinfo["path"] = tspath
            print("Using existing empty directory {!r} for tablespace {!r}".
                  format(tspath, tsname))
            dirs_to_recheck.append(["Tablespace {!r}".format(tsname), tspath])

        # We .pop() the elements of tablespace_mapping above - if mappings are given they must all exist or the
        # user probably made a typo with tablespace names, abort in that case.
        if tablespace_mapping:
            raise RestoreError(
                "Tablespace mapping for {} was requested, but the tablespaces are not present in the backup"
                .format(sorted(tablespace_mapping)))

        # First check that the existing (empty) directories are writable, then possibly wipe any directories as
        # requested by --overwrite and finally create the new dirs
        for diruse, dirname in dirs_to_recheck:
            try:
                tempfile.TemporaryFile(dir=dirname).close()
            except PermissionError:
                raise RestoreError(
                    "{} target directory {!r} is empty, but not writable, aborting."
                    .format(diruse, dirname))

        for dirname in dirs_to_wipe:
            shutil.rmtree(dirname)
        for dirname in dirs_to_create:
            os.makedirs(dirname)
            os.chmod(dirname, 0o700)

        fetcher = BasebackupFetcher(
            app_config=self.config,
            data_files=basebackup_data_files,
            status_output_file=status_output_file,
            debug=debug,
            pgdata=pgdata,
            site=site,
            tablespaces=tablespaces,
        )
        fetcher.fetch_all()

        create_recovery_conf(
            dirpath=pgdata,
            site=site,
            port=self.config["http_port"],
            primary_conninfo=primary_conninfo,
            recovery_end_command=recovery_end_command,
            recovery_target_action=recovery_target_action,
            recovery_target_name=recovery_target_name,
            recovery_target_time=recovery_target_time,
            recovery_target_xid=recovery_target_xid,
            restore_to_master=restore_to_master,
        )

        print("Basebackup restoration complete.")
        print("You can start PostgreSQL by running pg_ctl -D %s start" %
              pgdata)
        print(
            "On systemd based systems you can run systemctl start postgresql")
        print(
            "On SYSV Init based systems you can run /etc/init.d/postgresql start"
        )
예제 #10
0
    def _get_basebackup(self, pgdata, basebackup, site,
                        primary_conninfo=None,
                        recovery_end_command=None,
                        recovery_target_action=None,
                        recovery_target_name=None,
                        recovery_target_time=None,
                        recovery_target_xid=None,
                        restore_to_master=None,
                        overwrite=False,
                        tablespace_mapping=None):
        targets = [recovery_target_name, recovery_target_time, recovery_target_xid]
        if sum(0 if flag is None else 1 for flag in targets) > 1:
            raise RestoreError("Specify at most one of recovery_target_name, "
                               "recovery_target_time or recovery_target_xid")

        # If basebackup that we want it set as latest, figure out which one it is
        if recovery_target_time:
            try:
                recovery_target_time = dates.parse_timestamp(recovery_target_time)
            except (TypeError, ValueError) as ex:
                raise RestoreError("recovery_target_time {!r}: {}".format(recovery_target_time, ex))
            basebackup = self._find_nearest_basebackup(recovery_target_time)
        elif basebackup == "latest":
            basebackup = self._find_nearest_basebackup()

        # Grab basebackup metadata to make sure it exists and to look up tablespace requirements
        metadata = self.storage.get_basebackup_metadata(basebackup)
        tablespaces = {}

        # Make sure we have a proper place to write the $PGDATA and possible tablespaces
        dirs_to_create = []
        dirs_to_recheck = []
        dirs_to_wipe = []

        if not os.path.exists(pgdata):
            dirs_to_create.append(pgdata)
        elif overwrite:
            dirs_to_create.append(pgdata)
            dirs_to_wipe.append(pgdata)
        elif os.listdir(pgdata) in ([], ["lost+found"]):
            # Allow empty directories as well as ext3/4 mount points to be used, but check that we can write to them
            dirs_to_recheck.append(["$PGDATA", pgdata])
        else:
            raise RestoreError("$PGDATA target directory {!r} exists, is not empty and --overwrite not specified, aborting."
                               .format(pgdata))

        if metadata.get("format") == "pghoard-bb-v2":
            # "Backup file" is a metadata object, fetch it to get more information
            bmeta_compressed = self.storage.get_file_bytes(basebackup)
            with rohmufile.file_reader(fileobj=io.BytesIO(bmeta_compressed), metadata=metadata,
                                       key_lookup=config.key_lookup_for_site(self.config, site)) as input_obj:
                bmeta = common.extract_pghoard_bb_v2_metadata(input_obj)
            self.log.debug("Backup metadata: %r", bmeta)

            tablespaces = bmeta["tablespaces"]
            basebackup_data_files = [
                [
                    os.path.join(self.config["path_prefix"], site, "basebackup_chunk", chunk["chunk_filename"]),
                    chunk["result_size"],
                ]
                for chunk in bmeta["chunks"]
            ]
            # We need the files from the main basebackup file too
            basebackup_data_files.append([(io.BytesIO(bmeta_compressed), metadata), 0])

        elif metadata.get("format") == "pghoard-bb-v1":
            # Tablespace information stored in object store metadata, look it up
            tsmetare = re.compile("^tablespace-name-([0-9]+)$")
            for kw, value in metadata.items():
                match = tsmetare.match(kw)
                if not match:
                    continue
                tsoid = match.group(1)
                tsname = value
                tspath = metadata["tablespace-path-{}".format(tsoid)]
                tablespaces[tsname] = {
                    "oid": int(tsoid),
                    "path": tspath,
                }

            basebackup_data_files = [[basebackup, -1]]

        else:
            # Object is a raw (encrypted, compressed) basebackup
            basebackup_data_files = [[basebackup, -1]]

        # Map tablespaces as requested and make sure the directories exist
        for tsname, tsinfo in tablespaces.items():
            tspath = tablespace_mapping.pop(tsname, tsinfo["path"])
            if not os.path.exists(tspath):
                raise RestoreError("Tablespace {!r} target directory {!r} does not exist, aborting."
                                   .format(tsname, tspath))
            if os.listdir(tspath) not in ([], ["lost+found"]):
                # Allow empty directories as well as ext3/4 mount points to be used, but check that we can write to them
                raise RestoreError("Tablespace {!r} target directory {!r} exists but is not empty, aborting."
                                   .format(tsname, tspath))

            tsinfo["path"] = tspath
            print("Using existing empty directory {!r} for tablespace {!r}".format(tspath, tsname))
            dirs_to_recheck.append(["Tablespace {!r}".format(tsname), tspath])

        # We .pop() the elements of tablespace_mapping above - if mappings are given they must all exist or the
        # user probably made a typo with tablespace names, abort in that case.
        if tablespace_mapping:
            raise RestoreError("Tablespace mapping for {} was requested, but the tablespaces are not present in the backup"
                               .format(sorted(tablespace_mapping)))

        # First check that the existing (empty) directories are writable, then possibly wipe any directories as
        # requested by --overwrite and finally create the new dirs
        for diruse, dirname in dirs_to_recheck:
            try:
                tempfile.TemporaryFile(dir=dirname).close()
            except PermissionError:
                raise RestoreError("{} target directory {!r} is empty, but not writable, aborting."
                                   .format(diruse, dirname))

        for dirname in dirs_to_wipe:
            shutil.rmtree(dirname)
        for dirname in dirs_to_create:
            os.makedirs(dirname)
            os.chmod(dirname, 0o700)

        total_download_size = sum(item[1] for item in basebackup_data_files)
        progress_report_time = [0]
        download_errors = 0
        extract_errors = 0

        with futures.ThreadPoolExecutor(max_workers=self.config["compression"]["thread_count"]) as extract_executor:
            extract_jobs = []
            with futures.ThreadPoolExecutor(max_workers=self.config["transfer"]["thread_count"]) as download_executor:
                download_jobs = []
                download_progress_per_file = {
                    basebackup_data_file: 0
                    for basebackup_data_file, _ in basebackup_data_files
                    if not isinstance(basebackup_data_file, tuple)
                }

                def download_progress(end=""):
                    # report max once per second
                    if time.monotonic() - progress_report_time[0] < 1:
                        return
                    progress_report_time[0] = time.monotonic()

                    total_downloaded = sum(download_progress_per_file.values())
                    if total_download_size <= 0:
                        progress = 0
                    else:
                        progress = total_downloaded / total_download_size
                    print("\rDownload progress: {progress:.2%} ({dl_mib:.0f} / {total_mib:.0f} MiB)\r".format(
                        progress=progress,
                        dl_mib=total_downloaded / (1024 ** 2),
                        total_mib=total_download_size / (1024 ** 2),
                    ), end=end)

                for basebackup_data_file, backup_data_file_size in basebackup_data_files:
                    if isinstance(basebackup_data_file, tuple):
                        tmp_obj, tmp_metadata = basebackup_data_file
                        extract_jobs.append(extract_executor.submit(
                            self.extract_one_backup,
                            obj=tmp_obj,
                            metadata=tmp_metadata,
                            pgdata=pgdata,
                            site=site,
                            tablespaces=tablespaces,
                        ))
                        continue

                    def single_download_progress(current_pos, expected_max,
                                                 this_file_name=basebackup_data_file,
                                                 this_file_size=backup_data_file_size):
                        download_progress_per_file[this_file_name] = this_file_size * (current_pos / expected_max)
                        download_progress()

                    # NOTE: Most of the transfer clients aren't thread-safe, so initialize a new transfer
                    # client for each download.  We could use thread local storage or pooling here, but
                    # probably not worth the trouble for this use case.
                    transfer = get_transfer(common.get_object_storage_config(self.config, site))
                    download_jobs.append(download_executor.submit(
                        self.download_one_backup,
                        basebackup_data_file=basebackup_data_file,
                        progress_callback=single_download_progress,
                        site=site,
                        transfer=transfer,
                    ))

                for future in futures.as_completed(download_jobs):
                    if future.exception():
                        self.log.error("Got error from chunk download: %s", future.exception())
                        download_errors += 1
                        continue

                    tmp_obj, tmp_metadata = future.result()
                    extract_jobs.append(extract_executor.submit(
                        self.extract_one_backup,
                        obj=tmp_obj,
                        metadata=tmp_metadata,
                        pgdata=pgdata,
                        site=site,
                        tablespaces=tablespaces,
                    ))

                progress_report_time[0] = 0
                download_progress(end="\n")

            for future in futures.as_completed(extract_jobs):
                if future.exception():
                    self.log.error("Got error from chunk extraction: %s", future.exception())
                    extract_errors += 1
                    continue

        if download_errors:
            raise RestoreError("Backup download failed with {} errors".format(download_errors))
        if extract_errors:
            raise RestoreError("Backup extraction failed with {} errors".format(extract_errors))

        create_recovery_conf(
            dirpath=pgdata,
            site=site,
            port=self.config["http_port"],
            primary_conninfo=primary_conninfo,
            recovery_end_command=recovery_end_command,
            recovery_target_action=recovery_target_action,
            recovery_target_name=recovery_target_name,
            recovery_target_time=recovery_target_time,
            recovery_target_xid=recovery_target_xid,
            restore_to_master=restore_to_master,
        )

        print("Basebackup restoration complete.")
        print("You can start PostgreSQL by running pg_ctl -D %s start" % pgdata)
        print("On systemd based systems you can run systemctl start postgresql")
        print("On SYSV Init based systems you can run /etc/init.d/postgresql start")
예제 #11
0
파일: restore.py 프로젝트: ohmu/pghoard
    def _get_basebackup(self, pgdata, basebackup, site,
                        debug=False,
                        status_output_file=None,
                        primary_conninfo=None,
                        recovery_end_command=None,
                        recovery_target_action=None,
                        recovery_target_name=None,
                        recovery_target_time=None,
                        recovery_target_xid=None,
                        restore_to_master=None,
                        overwrite=False,
                        tablespace_mapping=None,
                        tablespace_base_dir=None):
        targets = [recovery_target_name, recovery_target_time, recovery_target_xid]
        if sum(0 if flag is None else 1 for flag in targets) > 1:
            raise RestoreError("Specify at most one of recovery_target_name, "
                               "recovery_target_time or recovery_target_xid")

        # If basebackup that we want it set as latest, figure out which one it is
        if recovery_target_time:
            try:
                recovery_target_time = dates.parse_timestamp(recovery_target_time)
            except (TypeError, ValueError) as ex:
                raise RestoreError("recovery_target_time {!r}: {}".format(recovery_target_time, ex))
            basebackup = self._find_nearest_basebackup(recovery_target_time)
        elif basebackup == "latest":
            basebackup = self._find_nearest_basebackup()
        elif isinstance(basebackup, str):
            basebackup = self._find_basebackup_for_name(basebackup)

        # Grab basebackup metadata to make sure it exists and to look up tablespace requirements
        metadata = self.storage.get_basebackup_metadata(basebackup["name"])
        tablespaces = {}

        # Make sure we have a proper place to write the $PGDATA and possible tablespaces
        dirs_to_create = []
        dirs_to_recheck = []
        dirs_to_wipe = []

        if not os.path.exists(pgdata):
            dirs_to_create.append(pgdata)
        elif overwrite:
            dirs_to_create.append(pgdata)
            dirs_to_wipe.append(pgdata)
        elif os.listdir(pgdata) in ([], ["lost+found"]):
            # Allow empty directories as well as ext3/4 mount points to be used, but check that we can write to them
            dirs_to_recheck.append(["$PGDATA", pgdata])
        else:
            raise RestoreError("$PGDATA target directory {!r} exists, is not empty and --overwrite not specified, aborting."
                               .format(pgdata))

        if metadata.get("format") == "pghoard-bb-v2":
            # "Backup file" is a metadata object, fetch it to get more information
            bmeta_compressed = self.storage.get_file_bytes(basebackup["name"])
            with rohmufile.file_reader(fileobj=io.BytesIO(bmeta_compressed), metadata=metadata,
                                       key_lookup=config.key_lookup_for_site(self.config, site)) as input_obj:
                bmeta = common.extract_pghoard_bb_v2_metadata(input_obj)
            self.log.debug("Backup metadata: %r", bmeta)

            tablespaces = bmeta["tablespaces"]
            basebackup_data_files = [
                [
                    os.path.join(self.config["backup_sites"][site]["prefix"], "basebackup_chunk", chunk["chunk_filename"]),
                    chunk["result_size"],
                ]
                for chunk in bmeta["chunks"]
            ]
            # We need the files from the main basebackup file too
            basebackup_data_files.append([(bmeta_compressed, metadata), 0])

        elif metadata.get("format") == "pghoard-bb-v1":
            # Tablespace information stored in object store metadata, look it up
            tsmetare = re.compile("^tablespace-name-([0-9]+)$")
            for kw, value in metadata.items():
                match = tsmetare.match(kw)
                if not match:
                    continue
                tsoid = match.group(1)
                tsname = value
                tspath = metadata["tablespace-path-{}".format(tsoid)]
                tablespaces[tsname] = {
                    "oid": int(tsoid),
                    "path": tspath,
                }

            basebackup_data_files = [[basebackup["name"], basebackup["size"]]]

        else:
            # Object is a raw (encrypted, compressed) basebackup
            basebackup_data_files = [[basebackup["name"], basebackup["size"]]]

        if tablespace_base_dir and not os.path.exists(tablespace_base_dir) and not overwrite:
            # we just care that the dir exists, but we're OK if there are other objects there
            raise RestoreError("Tablespace base directory {!r} does not exist, aborting."
                               .format(tablespace_base_dir))

        # Map tablespaces as requested and make sure the directories exist
        for tsname, tsinfo in tablespaces.items():
            tspath = tablespace_mapping.pop(tsname, tsinfo["path"])
            if tablespace_base_dir and not os.path.exists(tspath):
                tspath = os.path.join(tablespace_base_dir, str(tsinfo["oid"]))
                os.makedirs(tspath, exist_ok=True)
            if not os.path.exists(tspath):
                raise RestoreError("Tablespace {!r} target directory {!r} does not exist, aborting."
                                   .format(tsname, tspath))
            if os.listdir(tspath) not in ([], ["lost+found"]):
                # Allow empty directories as well as ext3/4 mount points to be used, but check that we can write to them
                raise RestoreError("Tablespace {!r} target directory {!r} exists but is not empty, aborting."
                                   .format(tsname, tspath))

            tsinfo["path"] = tspath
            print("Using existing empty directory {!r} for tablespace {!r}".format(tspath, tsname))
            dirs_to_recheck.append(["Tablespace {!r}".format(tsname), tspath])

        # We .pop() the elements of tablespace_mapping above - if mappings are given they must all exist or the
        # user probably made a typo with tablespace names, abort in that case.
        if tablespace_mapping:
            raise RestoreError("Tablespace mapping for {} was requested, but the tablespaces are not present in the backup"
                               .format(sorted(tablespace_mapping)))

        # First check that the existing (empty) directories are writable, then possibly wipe any directories as
        # requested by --overwrite and finally create the new dirs
        for diruse, dirname in dirs_to_recheck:
            try:
                tempfile.TemporaryFile(dir=dirname).close()
            except PermissionError:
                raise RestoreError("{} target directory {!r} is empty, but not writable, aborting."
                                   .format(diruse, dirname))

        for dirname in dirs_to_wipe:
            shutil.rmtree(dirname)
        for dirname in dirs_to_create:
            os.makedirs(dirname)
            os.chmod(dirname, 0o700)

        fetcher = BasebackupFetcher(
            app_config=self.config,
            data_files=basebackup_data_files,
            status_output_file=status_output_file,
            debug=debug,
            pgdata=pgdata,
            site=site,
            tablespaces=tablespaces,
        )
        fetcher.fetch_all()

        create_recovery_conf(
            dirpath=pgdata,
            site=site,
            port=self.config["http_port"],
            primary_conninfo=primary_conninfo,
            recovery_end_command=recovery_end_command,
            recovery_target_action=recovery_target_action,
            recovery_target_name=recovery_target_name,
            recovery_target_time=recovery_target_time,
            recovery_target_xid=recovery_target_xid,
            restore_to_master=restore_to_master,
        )

        print("Basebackup restoration complete.")
        print("You can start PostgreSQL by running pg_ctl -D %s start" % pgdata)
        print("On systemd based systems you can run systemctl start postgresql")
        print("On SYSV Init based systems you can run /etc/init.d/postgresql start")