Exemple #1
0
    def _find_nearest_basebackup(self, recovery_target_time=None):
        applicable_basebackups = []

        basebackups = self.storage.list_basebackups()
        for basebackup in basebackups:
            if recovery_target_time:
                # We really need the backup end time here, but pg_basebackup based backup methods don't provide
                # it for us currently, so fall back to using start-time.
                if "end-time" in basebackup["metadata"]:
                    backup_ts = dates.parse_timestamp(
                        basebackup["metadata"]["end-time"])
                else:
                    backup_ts = dates.parse_timestamp(
                        basebackup["metadata"]["start-time"])
                if backup_ts >= recovery_target_time:
                    continue
            applicable_basebackups.append(basebackup)

        if not applicable_basebackups:
            raise RestoreError("No applicable basebackups found, exiting")

        # NOTE: as above, we may not have end-time so just sort by start-time, the order should be the same
        applicable_basebackups.sort(
            key=lambda basebackup: basebackup["metadata"]["start-time"])
        caption = "Found {} applicable basebackup{}".format(
            len(applicable_basebackups),
            "" if len(applicable_basebackups) == 1 else "s")
        print_basebackup_list(applicable_basebackups, caption=caption)

        selected = applicable_basebackups[-1]["name"]
        print("\nSelecting {!r} for restore".format(selected))
        return selected
Exemple #2
0
def test_parse_timestamp():
    local_aware = datetime.datetime.now(dateutil.tz.tzlocal())

    # split local_aware such as "2021-02-08T09:58:27.988218-05:00" into date, time, tzoffset components
    str_date, str_localtime_aware = local_aware.isoformat().split("T", 1)
    str_localtime_naive = re.split("[+-]", str_localtime_aware, maxsplit=1)[0]
    str_local_aware_named = "{}T{} {}".format(str_date, str_localtime_naive,
                                              local_aware.tzname())

    assert parse_timestamp(str_local_aware_named) == local_aware
    local_naive = parse_timestamp(str_local_aware_named,
                                  with_tz=False,
                                  assume_local=True)
    assert local_naive == local_aware.replace(tzinfo=None)

    str_unknown_aware = "2017-02-02 12:00:00 XYZ"
    unknown_aware_utc = parse_timestamp(str_unknown_aware)
    assert unknown_aware_utc.tzinfo == datetime.timezone.utc
    assert unknown_aware_utc.isoformat() == "2017-02-02T12:00:00+00:00"

    if local_aware.tzname() in ["EET", "EEST"]:
        unknown_aware_local = parse_timestamp(str_unknown_aware,
                                              assume_local=True)
        assert unknown_aware_local.tzinfo == dateutil.tz.tzlocal()
        assert unknown_aware_local.isoformat() == "2017-02-02T12:00:00+02:00"
Exemple #3
0
    def _find_nearest_basebackup(self, recovery_target_time=None):
        applicable_basebackups = []

        basebackups = self.storage.list_basebackups()
        for basebackup in basebackups:
            if recovery_target_time:
                # We really need the backup end time here, but pg_basebackup based backup methods don't provide
                # it for us currently, so fall back to using start-time.
                if "end-time" in basebackup["metadata"]:
                    backup_ts = dates.parse_timestamp(basebackup["metadata"]["end-time"])
                else:
                    backup_ts = dates.parse_timestamp(basebackup["metadata"]["start-time"])
                if backup_ts >= recovery_target_time:
                    continue
            applicable_basebackups.append(basebackup)

        if not applicable_basebackups:
            raise RestoreError("No applicable basebackups found, exiting")

        # NOTE: as above, we may not have end-time so just sort by start-time, the order should be the same
        applicable_basebackups.sort(key=lambda basebackup: basebackup["metadata"]["start-time"])
        caption = "Found {} applicable basebackup{}".format(
            len(applicable_basebackups),
            "" if len(applicable_basebackups) == 1 else "s")
        print_basebackup_list(applicable_basebackups, caption=caption)

        selected = applicable_basebackups[-1]
        print("\nSelecting {!r} for restore".format(selected["name"]))
        return selected
Exemple #4
0
def print_basebackup_list(basebackups,
                          *,
                          caption="Available basebackups",
                          verbose=True):
    print(caption, "\n")
    fmt = "{name:40}  {size:>11}  {orig_size:>11}  {time:20}".format
    print(
        fmt(name="Basebackup",
            size="Backup size",
            time="Start time",
            orig_size="Orig size"))
    print(fmt(name="-" * 40, size="-" * 11, time="-" * 20, orig_size="-" * 11))
    for b in sorted(basebackups, key=lambda b: b["name"]):
        meta = b["metadata"].copy()
        lm = meta.pop("start-time")
        if isinstance(lm, str):
            lm = dates.parse_timestamp(lm)
        if lm.tzinfo:
            lm = lm.astimezone(datetime.timezone.utc).replace(tzinfo=None)
        lm_str = lm.isoformat()[:19] + "Z"  # # pylint: disable=no-member
        size_str = "{} MB".format(b["size"] // (1024**2))
        orig_size = int(meta.pop("original-file-size", 0) or 0)
        if orig_size:
            orig_size_str = "{} MB".format(orig_size // (1024**2))
        else:
            orig_size_str = "n/a"
        print(
            fmt(name=b["name"],
                size=size_str,
                time=lm_str,
                orig_size=orig_size_str))
        if verbose:
            print("    metadata:", meta)
Exemple #5
0
def test_parse_timestamp():
    local_aware = datetime.datetime.now(dateutil.tz.tzlocal())

    str_local_aware_naive = local_aware.isoformat().split("+", 1)[0]
    str_local_aware_named = "{} {}".format(str_local_aware_naive, local_aware.tzname())

    assert parse_timestamp(str_local_aware_named) == local_aware
    local_naive = parse_timestamp(str_local_aware_named, with_tz=False, assume_local=True)
    assert local_naive == local_aware.replace(tzinfo=None)

    str_unknown_aware = "2017-02-02 12:00:00 XYZ"
    unknown_aware_utc = parse_timestamp(str_unknown_aware)
    assert unknown_aware_utc.tzinfo == datetime.timezone.utc
    assert unknown_aware_utc.isoformat() == "2017-02-02T12:00:00+00:00"

    if local_aware.tzname() in ["EET", "EEST"]:
        unknown_aware_local = parse_timestamp(str_unknown_aware, assume_local=True)
        assert unknown_aware_local.tzinfo == dateutil.tz.tzlocal()
        assert unknown_aware_local.isoformat() == "2017-02-02T12:00:00+02:00"
Exemple #6
0
 def patch_basebackup_info(self, *, entry, site_config):
     # drop path from resulting list and convert timestamps
     entry["name"] = os.path.basename(entry["name"])
     metadata = entry["metadata"]
     metadata["start-time"] = dates.parse_timestamp(metadata["start-time"])
     # If backup was created by old PGHoard version some fields related to backup scheduling might be missing.
     # Set "best guess" values for those fields here to simplify logic elsewhere.
     if "backup-decision-time" in metadata:
         metadata["backup-decision-time"] = dates.parse_timestamp(
             metadata["backup-decision-time"])
     else:
         metadata["backup-decision-time"] = metadata["start-time"]
     # Backups are usually scheduled
     if "backup-reason" not in metadata:
         metadata["backup-reason"] = "scheduled"
     # Calculate normalized backup time based on start time if missing
     if "normalized-backup-time" not in metadata:
         metadata[
             "normalized-backup-time"] = self.get_normalized_backup_time(
                 site_config, now=metadata["start-time"])
Exemple #7
0
 def parse_backup_label(self, backup_label_data):
     for line in backup_label_data.split(b"\n"):
         if line.startswith(b"START WAL LOCATION"):
             start_wal_segment = line.split()[5].strip(b")").decode("utf8")
         elif line.startswith(b"START TIME: "):
             start_time_text = line[len("START TIME: "):].decode("utf8")
             start_time_dt = dates.parse_timestamp(start_time_text,
                                                   assume_local=True)
             start_time = start_time_dt.isoformat()
     self.log.debug("Found: %r as starting wal segment, start_time: %r",
                    start_wal_segment, start_time)
     return start_wal_segment, start_time
Exemple #8
0
 def parse_backup_label(self, backup_label_data):
     if isinstance(backup_label_data, str):
         backup_label_data = backup_label_data.encode("utf-8")
     for line in backup_label_data.split(b"\n"):
         if line.startswith(b"START WAL LOCATION"):
             start_wal_segment = line.split()[5].strip(b")").decode("utf8")
         elif line.startswith(b"START TIME: "):
             start_time_text = line[len("START TIME: "):].decode("utf8")
             start_time_dt = dates.parse_timestamp(start_time_text, assume_local=True)
             start_time = start_time_dt.isoformat()
     self.log.debug("Found: %r as starting wal segment, start_time: %r",
                    start_wal_segment, start_time)
     return start_wal_segment, start_time
Exemple #9
0
def test_parse_timestamp():
    local_aware = datetime.datetime.now(dateutil.tz.tzlocal())

    str_local_aware_naive = local_aware.isoformat().split("+", 1)[0]
    str_local_aware_named = "{} {}".format(str_local_aware_naive,
                                           local_aware.tzname())

    assert parse_timestamp(str_local_aware_named) == local_aware
    local_naive = parse_timestamp(str_local_aware_named,
                                  with_tz=False,
                                  assume_local=True)
    assert local_naive == local_aware.replace(tzinfo=None)

    str_unknown_aware = "2017-02-02 12:00:00 XYZ"
    unknown_aware_utc = parse_timestamp(str_unknown_aware)
    assert unknown_aware_utc.tzinfo == datetime.timezone.utc
    assert unknown_aware_utc.isoformat() == "2017-02-02T12:00:00+00:00"

    if local_aware.tzname() in ["EET", "EEST"]:
        unknown_aware_local = parse_timestamp(str_unknown_aware,
                                              assume_local=True)
        assert unknown_aware_local.tzinfo == dateutil.tz.tzlocal()
        assert unknown_aware_local.isoformat() == "2017-02-02T12:00:00+02:00"
Exemple #10
0
    def get_remote_basebackups_info(self, site):
        storage = self.site_transfers.get(site)
        if not storage:
            storage_config = get_object_storage_config(self.config, site)
            storage = get_transfer(storage_config)
            self.site_transfers[site] = storage

        results = storage.list_path(os.path.join(self.config["backup_sites"][site]["prefix"], "basebackup"))
        for entry in results:
            # drop path from resulting list and convert timestamps
            entry["name"] = os.path.basename(entry["name"])
            entry["metadata"]["start-time"] = dates.parse_timestamp(entry["metadata"]["start-time"])

        results.sort(key=lambda entry: entry["metadata"]["start-time"])
        return results
Exemple #11
0
    def get_remote_basebackups_info(self, site):
        storage = self.site_transfers.get(site)
        if not storage:
            storage_config = get_object_storage_config(self.config, site)
            storage = get_transfer(storage_config)
            self.site_transfers[site] = storage

        results = storage.list_path(os.path.join(self.config["backup_sites"][site]["prefix"], "basebackup"))
        for entry in results:
            # drop path from resulting list and convert timestamps
            entry["name"] = os.path.basename(entry["name"])
            entry["metadata"]["start-time"] = dates.parse_timestamp(entry["metadata"]["start-time"])

        results.sort(key=lambda entry: entry["metadata"]["start-time"])
        return results
Exemple #12
0
def print_basebackup_list(basebackups, *, caption="Available basebackups", verbose=True):
    print(caption, "\n")
    fmt = "{name:40}  {size:>11}  {orig_size:>11}  {time:20}".format
    print(fmt(name="Basebackup", size="Backup size", time="Start time", orig_size="Orig size"))
    print(fmt(name="-" * 40, size="-" * 11, time="-" * 20, orig_size="-" * 11))
    for b in sorted(basebackups, key=lambda b: b["name"]):
        meta = b["metadata"].copy()
        lm = meta.pop("start-time")
        if isinstance(lm, str):
            lm = dates.parse_timestamp(lm)
        if lm.tzinfo:
            lm = lm.astimezone(datetime.timezone.utc).replace(tzinfo=None)
        lm_str = lm.isoformat()[:19] + "Z"  # # pylint: disable=no-member
        size_str = "{} MB".format(int(meta.get("total-size-enc", b["size"])) // (1024 ** 2))
        orig_size = int(meta.get("total-size-plain", meta.get("original-file-size")) or 0)
        if orig_size:
            orig_size_str = "{} MB".format(orig_size // (1024 ** 2))
        else:
            orig_size_str = "n/a"
        print(fmt(name=b["name"], size=size_str, time=lm_str, orig_size=orig_size_str))
        if verbose:
            print("    metadata:", meta)
Exemple #13
0
    def _get_basebackup(self,
                        pgdata,
                        basebackup,
                        site,
                        primary_conninfo=None,
                        recovery_end_command=None,
                        recovery_target_action=None,
                        recovery_target_name=None,
                        recovery_target_time=None,
                        recovery_target_xid=None,
                        restore_to_master=None,
                        overwrite=False,
                        tablespace_mapping=None):
        targets = [
            recovery_target_name, recovery_target_time, recovery_target_xid
        ]
        if sum(0 if flag is None else 1 for flag in targets) > 1:
            raise RestoreError("Specify at most one of recovery_target_name, "
                               "recovery_target_time or recovery_target_xid")

        # If basebackup that we want it set as latest, figure out which one it is
        if recovery_target_time:
            try:
                recovery_target_time = dates.parse_timestamp(
                    recovery_target_time)
            except (TypeError, ValueError) as ex:
                raise RestoreError("recovery_target_time {!r}: {}".format(
                    recovery_target_time, ex))
            basebackup = self._find_nearest_basebackup(recovery_target_time)
        elif basebackup == "latest":
            basebackup = self._find_nearest_basebackup()

        # Grab basebackup metadata to make sure it exists and to look up tablespace requirements
        metadata = self.storage.get_basebackup_metadata(basebackup)

        # Make sure we have a proper place to write the $PGDATA and possible tablespaces
        dirs_to_create = []
        dirs_to_recheck = []
        dirs_to_wipe = []

        if not os.path.exists(pgdata):
            dirs_to_create.append(pgdata)
        elif overwrite:
            dirs_to_create.append(pgdata)
            dirs_to_wipe.append(pgdata)
        elif os.listdir(pgdata) in ([], ["lost+found"]):
            # Allow empty directories as well as ext3/4 mount points to be used, but check that we can write to them
            dirs_to_recheck.append(["$PGDATA", pgdata])
        else:
            raise RestoreError(
                "$PGDATA target directory {!r} exists, is not empty and --overwrite not specified, aborting."
                .format(pgdata))

        tablespaces = {}
        tsmetare = re.compile("^tablespace-name-([0-9]+)$")
        for kw, value in metadata.items():
            match = tsmetare.match(kw)
            if not match:
                continue
            tsoid = match.group(1)
            tsname = value
            tspath = tablespace_mapping.pop(
                tsname, metadata["tablespace-path-{}".format(tsoid)])
            if not os.path.exists(tspath):
                raise RestoreError(
                    "Tablespace {!r} target directory {!r} does not exist, aborting."
                    .format(tsname, tspath))
            if os.listdir(tspath) not in ([], ["lost+found"]):
                # Allow empty directories as well as ext3/4 mount points to be used, but check that we can write to them
                raise RestoreError(
                    "Tablespace {!r} target directory {!r} exists but is not empty, aborting."
                    .format(tsname, tspath))

            print("Using existing empty directory {!r} for tablespace {!r}".
                  format(tspath, tsname))
            tablespaces[tsname] = {
                "oid": int(tsoid),
                "path": tspath,
            }
            dirs_to_recheck.append(["Tablespace {!r}".format(tsname), tspath])

        # We .pop() the elements of tablespace_mapping above - if mappings are given they must all exist or the
        # user probably made a typo with tablespace names, abort in that case.
        if tablespace_mapping:
            raise RestoreError(
                "Tablespace mapping for {} was requested, but the tablespaces are not present in the backup"
                .format(sorted(tablespace_mapping)))

        # First check that the existing (empty) directories are writable, then possibly wipe any directories as
        # requested by --overwrite and finally create the new dirs
        for diruse, dirname in dirs_to_recheck:
            try:
                tempfile.TemporaryFile(dir=dirname).close()
            except PermissionError:
                raise RestoreError(
                    "{} target directory {!r} is empty, but not writable, aborting."
                    .format(diruse, dirname))

        for dirname in dirs_to_wipe:
            shutil.rmtree(dirname)
        for dirname in dirs_to_create:
            os.makedirs(dirname)
            os.chmod(dirname, 0o700)

        def download_progress(current_pos, expected_max, end=""):
            print("\rDownload progress: {:.2%}".format(current_pos /
                                                       expected_max),
                  end=end)

        with tempfile.TemporaryFile(dir=self.config["backup_location"],
                                    prefix="basebackup.",
                                    suffix=".pghoard") as tmp:
            self.storage.get_basebackup_file_to_fileobj(
                basebackup, tmp, progress_callback=download_progress)
            download_progress(1, 1, end="\n")
            tmp.seek(0)

            with rohmufile.file_reader(fileobj=tmp,
                                       metadata=metadata,
                                       key_lookup=config.key_lookup_for_site(
                                           self.config, site)) as input_obj:
                if metadata.get("format") == "pghoard-bb-v1":
                    self._extract_pghoard_bb_v1(input_obj, pgdata, tablespaces)
                else:
                    self._extract_basic(input_obj, pgdata)

        create_recovery_conf(
            dirpath=pgdata,
            site=site,
            port=self.config["http_port"],
            primary_conninfo=primary_conninfo,
            recovery_end_command=recovery_end_command,
            recovery_target_action=recovery_target_action,
            recovery_target_name=recovery_target_name,
            recovery_target_time=recovery_target_time,
            recovery_target_xid=recovery_target_xid,
            restore_to_master=restore_to_master,
        )

        print("Basebackup restoration complete.")
        print("You can start PostgreSQL by running pg_ctl -D %s start" %
              pgdata)
        print(
            "On systemd based systems you can run systemctl start postgresql")
        print(
            "On SYSV Init based systems you can run /etc/init.d/postgresql start"
        )
Exemple #14
0
    def _get_basebackup(self,
                        pgdata,
                        basebackup,
                        site,
                        debug=False,
                        status_output_file=None,
                        primary_conninfo=None,
                        recovery_end_command=None,
                        recovery_target_action=None,
                        recovery_target_name=None,
                        recovery_target_time=None,
                        recovery_target_xid=None,
                        restore_to_master=None,
                        overwrite=False,
                        tablespace_mapping=None,
                        tablespace_base_dir=None):
        targets = [
            recovery_target_name, recovery_target_time, recovery_target_xid
        ]
        if sum(0 if flag is None else 1 for flag in targets) > 1:
            raise RestoreError("Specify at most one of recovery_target_name, "
                               "recovery_target_time or recovery_target_xid")

        # If basebackup that we want it set as latest, figure out which one it is
        if recovery_target_time:
            try:
                recovery_target_time = dates.parse_timestamp(
                    recovery_target_time)
            except (TypeError, ValueError) as ex:
                raise RestoreError("recovery_target_time {!r}: {}".format(
                    recovery_target_time, ex))
            basebackup = self._find_nearest_basebackup(recovery_target_time)
        elif basebackup == "latest":
            basebackup = self._find_nearest_basebackup()
        elif isinstance(basebackup, str):
            basebackup = self._find_basebackup_for_name(basebackup)

        # Grab basebackup metadata to make sure it exists and to look up tablespace requirements
        metadata = self.storage.get_basebackup_metadata(basebackup["name"])
        tablespaces = {}

        # Make sure we have a proper place to write the $PGDATA and possible tablespaces
        dirs_to_create = []
        dirs_to_recheck = []
        dirs_to_wipe = []

        if not os.path.exists(pgdata):
            dirs_to_create.append(pgdata)
        elif overwrite:
            dirs_to_create.append(pgdata)
            dirs_to_wipe.append(pgdata)
        elif os.listdir(pgdata) in ([], ["lost+found"]):
            # Allow empty directories as well as ext3/4 mount points to be used, but check that we can write to them
            dirs_to_recheck.append(["$PGDATA", pgdata])
        else:
            raise RestoreError(
                "$PGDATA target directory {!r} exists, is not empty and --overwrite not specified, aborting."
                .format(pgdata))

        if metadata.get("format") == "pghoard-bb-v2":
            # "Backup file" is a metadata object, fetch it to get more information
            bmeta_compressed = self.storage.get_file_bytes(basebackup["name"])
            with rohmufile.file_reader(fileobj=io.BytesIO(bmeta_compressed),
                                       metadata=metadata,
                                       key_lookup=config.key_lookup_for_site(
                                           self.config, site)) as input_obj:
                bmeta = common.extract_pghoard_bb_v2_metadata(input_obj)
            self.log.debug("Backup metadata: %r", bmeta)

            tablespaces = bmeta["tablespaces"]
            basebackup_data_files = [[
                os.path.join(self.config["backup_sites"][site]["prefix"],
                             "basebackup_chunk", chunk["chunk_filename"]),
                chunk["result_size"],
            ] for chunk in bmeta["chunks"]]
            # We need the files from the main basebackup file too
            basebackup_data_files.append([(bmeta_compressed, metadata), 0])

        elif metadata.get("format") == "pghoard-bb-v1":
            # Tablespace information stored in object store metadata, look it up
            tsmetare = re.compile("^tablespace-name-([0-9]+)$")
            for kw, value in metadata.items():
                match = tsmetare.match(kw)
                if not match:
                    continue
                tsoid = match.group(1)
                tsname = value
                tspath = metadata["tablespace-path-{}".format(tsoid)]
                tablespaces[tsname] = {
                    "oid": int(tsoid),
                    "path": tspath,
                }

            basebackup_data_files = [[basebackup["name"], basebackup["size"]]]

        else:
            # Object is a raw (encrypted, compressed) basebackup
            basebackup_data_files = [[basebackup["name"], basebackup["size"]]]

        if tablespace_base_dir and not os.path.exists(
                tablespace_base_dir) and not overwrite:
            # we just care that the dir exists, but we're OK if there are other objects there
            raise RestoreError(
                "Tablespace base directory {!r} does not exist, aborting.".
                format(tablespace_base_dir))

        # Map tablespaces as requested and make sure the directories exist
        for tsname, tsinfo in tablespaces.items():
            tspath = tablespace_mapping.pop(tsname, tsinfo["path"])
            if tablespace_base_dir and not os.path.exists(tspath):
                tspath = os.path.join(tablespace_base_dir, str(tsinfo["oid"]))
                os.makedirs(tspath, exist_ok=True)
            if not os.path.exists(tspath):
                raise RestoreError(
                    "Tablespace {!r} target directory {!r} does not exist, aborting."
                    .format(tsname, tspath))
            if os.listdir(tspath) not in ([], ["lost+found"]):
                # Allow empty directories as well as ext3/4 mount points to be used, but check that we can write to them
                raise RestoreError(
                    "Tablespace {!r} target directory {!r} exists but is not empty, aborting."
                    .format(tsname, tspath))

            tsinfo["path"] = tspath
            print("Using existing empty directory {!r} for tablespace {!r}".
                  format(tspath, tsname))
            dirs_to_recheck.append(["Tablespace {!r}".format(tsname), tspath])

        # We .pop() the elements of tablespace_mapping above - if mappings are given they must all exist or the
        # user probably made a typo with tablespace names, abort in that case.
        if tablespace_mapping:
            raise RestoreError(
                "Tablespace mapping for {} was requested, but the tablespaces are not present in the backup"
                .format(sorted(tablespace_mapping)))

        # First check that the existing (empty) directories are writable, then possibly wipe any directories as
        # requested by --overwrite and finally create the new dirs
        for diruse, dirname in dirs_to_recheck:
            try:
                tempfile.TemporaryFile(dir=dirname).close()
            except PermissionError:
                raise RestoreError(
                    "{} target directory {!r} is empty, but not writable, aborting."
                    .format(diruse, dirname))

        for dirname in dirs_to_wipe:
            shutil.rmtree(dirname)
        for dirname in dirs_to_create:
            os.makedirs(dirname)
            os.chmod(dirname, 0o700)

        fetcher = BasebackupFetcher(
            app_config=self.config,
            data_files=basebackup_data_files,
            status_output_file=status_output_file,
            debug=debug,
            pgdata=pgdata,
            site=site,
            tablespaces=tablespaces,
        )
        fetcher.fetch_all()

        create_recovery_conf(
            dirpath=pgdata,
            site=site,
            port=self.config["http_port"],
            primary_conninfo=primary_conninfo,
            recovery_end_command=recovery_end_command,
            recovery_target_action=recovery_target_action,
            recovery_target_name=recovery_target_name,
            recovery_target_time=recovery_target_time,
            recovery_target_xid=recovery_target_xid,
            restore_to_master=restore_to_master,
        )

        print("Basebackup restoration complete.")
        print("You can start PostgreSQL by running pg_ctl -D %s start" %
              pgdata)
        print(
            "On systemd based systems you can run systemctl start postgresql")
        print(
            "On SYSV Init based systems you can run /etc/init.d/postgresql start"
        )
Exemple #15
0
    def _get_basebackup(self, pgdata, basebackup, site,
                        primary_conninfo=None,
                        recovery_end_command=None,
                        recovery_target_action=None,
                        recovery_target_name=None,
                        recovery_target_time=None,
                        recovery_target_xid=None,
                        restore_to_master=None,
                        overwrite=False,
                        tablespace_mapping=None):
        targets = [recovery_target_name, recovery_target_time, recovery_target_xid]
        if sum(0 if flag is None else 1 for flag in targets) > 1:
            raise RestoreError("Specify at most one of recovery_target_name, "
                               "recovery_target_time or recovery_target_xid")

        # If basebackup that we want it set as latest, figure out which one it is
        if recovery_target_time:
            try:
                recovery_target_time = dates.parse_timestamp(recovery_target_time)
            except (TypeError, ValueError) as ex:
                raise RestoreError("recovery_target_time {!r}: {}".format(recovery_target_time, ex))
            basebackup = self._find_nearest_basebackup(recovery_target_time)
        elif basebackup == "latest":
            basebackup = self._find_nearest_basebackup()

        # Grab basebackup metadata to make sure it exists and to look up tablespace requirements
        metadata = self.storage.get_basebackup_metadata(basebackup)
        tablespaces = {}

        # Make sure we have a proper place to write the $PGDATA and possible tablespaces
        dirs_to_create = []
        dirs_to_recheck = []
        dirs_to_wipe = []

        if not os.path.exists(pgdata):
            dirs_to_create.append(pgdata)
        elif overwrite:
            dirs_to_create.append(pgdata)
            dirs_to_wipe.append(pgdata)
        elif os.listdir(pgdata) in ([], ["lost+found"]):
            # Allow empty directories as well as ext3/4 mount points to be used, but check that we can write to them
            dirs_to_recheck.append(["$PGDATA", pgdata])
        else:
            raise RestoreError("$PGDATA target directory {!r} exists, is not empty and --overwrite not specified, aborting."
                               .format(pgdata))

        if metadata.get("format") == "pghoard-bb-v2":
            # "Backup file" is a metadata object, fetch it to get more information
            bmeta_compressed = self.storage.get_file_bytes(basebackup)
            with rohmufile.file_reader(fileobj=io.BytesIO(bmeta_compressed), metadata=metadata,
                                       key_lookup=config.key_lookup_for_site(self.config, site)) as input_obj:
                bmeta = common.extract_pghoard_bb_v2_metadata(input_obj)
            self.log.debug("Backup metadata: %r", bmeta)

            tablespaces = bmeta["tablespaces"]
            basebackup_data_files = [
                [
                    os.path.join(self.config["path_prefix"], site, "basebackup_chunk", chunk["chunk_filename"]),
                    chunk["result_size"],
                ]
                for chunk in bmeta["chunks"]
            ]
            # We need the files from the main basebackup file too
            basebackup_data_files.append([(io.BytesIO(bmeta_compressed), metadata), 0])

        elif metadata.get("format") == "pghoard-bb-v1":
            # Tablespace information stored in object store metadata, look it up
            tsmetare = re.compile("^tablespace-name-([0-9]+)$")
            for kw, value in metadata.items():
                match = tsmetare.match(kw)
                if not match:
                    continue
                tsoid = match.group(1)
                tsname = value
                tspath = metadata["tablespace-path-{}".format(tsoid)]
                tablespaces[tsname] = {
                    "oid": int(tsoid),
                    "path": tspath,
                }

            basebackup_data_files = [[basebackup, -1]]

        else:
            # Object is a raw (encrypted, compressed) basebackup
            basebackup_data_files = [[basebackup, -1]]

        # Map tablespaces as requested and make sure the directories exist
        for tsname, tsinfo in tablespaces.items():
            tspath = tablespace_mapping.pop(tsname, tsinfo["path"])
            if not os.path.exists(tspath):
                raise RestoreError("Tablespace {!r} target directory {!r} does not exist, aborting."
                                   .format(tsname, tspath))
            if os.listdir(tspath) not in ([], ["lost+found"]):
                # Allow empty directories as well as ext3/4 mount points to be used, but check that we can write to them
                raise RestoreError("Tablespace {!r} target directory {!r} exists but is not empty, aborting."
                                   .format(tsname, tspath))

            tsinfo["path"] = tspath
            print("Using existing empty directory {!r} for tablespace {!r}".format(tspath, tsname))
            dirs_to_recheck.append(["Tablespace {!r}".format(tsname), tspath])

        # We .pop() the elements of tablespace_mapping above - if mappings are given they must all exist or the
        # user probably made a typo with tablespace names, abort in that case.
        if tablespace_mapping:
            raise RestoreError("Tablespace mapping for {} was requested, but the tablespaces are not present in the backup"
                               .format(sorted(tablespace_mapping)))

        # First check that the existing (empty) directories are writable, then possibly wipe any directories as
        # requested by --overwrite and finally create the new dirs
        for diruse, dirname in dirs_to_recheck:
            try:
                tempfile.TemporaryFile(dir=dirname).close()
            except PermissionError:
                raise RestoreError("{} target directory {!r} is empty, but not writable, aborting."
                                   .format(diruse, dirname))

        for dirname in dirs_to_wipe:
            shutil.rmtree(dirname)
        for dirname in dirs_to_create:
            os.makedirs(dirname)
            os.chmod(dirname, 0o700)

        total_download_size = sum(item[1] for item in basebackup_data_files)
        progress_report_time = [0]
        download_errors = 0
        extract_errors = 0

        with futures.ThreadPoolExecutor(max_workers=self.config["compression"]["thread_count"]) as extract_executor:
            extract_jobs = []
            with futures.ThreadPoolExecutor(max_workers=self.config["transfer"]["thread_count"]) as download_executor:
                download_jobs = []
                download_progress_per_file = {
                    basebackup_data_file: 0
                    for basebackup_data_file, _ in basebackup_data_files
                    if not isinstance(basebackup_data_file, tuple)
                }

                def download_progress(end=""):
                    # report max once per second
                    if time.monotonic() - progress_report_time[0] < 1:
                        return
                    progress_report_time[0] = time.monotonic()

                    total_downloaded = sum(download_progress_per_file.values())
                    if total_download_size <= 0:
                        progress = 0
                    else:
                        progress = total_downloaded / total_download_size
                    print("\rDownload progress: {progress:.2%} ({dl_mib:.0f} / {total_mib:.0f} MiB)\r".format(
                        progress=progress,
                        dl_mib=total_downloaded / (1024 ** 2),
                        total_mib=total_download_size / (1024 ** 2),
                    ), end=end)

                for basebackup_data_file, backup_data_file_size in basebackup_data_files:
                    if isinstance(basebackup_data_file, tuple):
                        tmp_obj, tmp_metadata = basebackup_data_file
                        extract_jobs.append(extract_executor.submit(
                            self.extract_one_backup,
                            obj=tmp_obj,
                            metadata=tmp_metadata,
                            pgdata=pgdata,
                            site=site,
                            tablespaces=tablespaces,
                        ))
                        continue

                    def single_download_progress(current_pos, expected_max,
                                                 this_file_name=basebackup_data_file,
                                                 this_file_size=backup_data_file_size):
                        download_progress_per_file[this_file_name] = this_file_size * (current_pos / expected_max)
                        download_progress()

                    # NOTE: Most of the transfer clients aren't thread-safe, so initialize a new transfer
                    # client for each download.  We could use thread local storage or pooling here, but
                    # probably not worth the trouble for this use case.
                    transfer = get_transfer(common.get_object_storage_config(self.config, site))
                    download_jobs.append(download_executor.submit(
                        self.download_one_backup,
                        basebackup_data_file=basebackup_data_file,
                        progress_callback=single_download_progress,
                        site=site,
                        transfer=transfer,
                    ))

                for future in futures.as_completed(download_jobs):
                    if future.exception():
                        self.log.error("Got error from chunk download: %s", future.exception())
                        download_errors += 1
                        continue

                    tmp_obj, tmp_metadata = future.result()
                    extract_jobs.append(extract_executor.submit(
                        self.extract_one_backup,
                        obj=tmp_obj,
                        metadata=tmp_metadata,
                        pgdata=pgdata,
                        site=site,
                        tablespaces=tablespaces,
                    ))

                progress_report_time[0] = 0
                download_progress(end="\n")

            for future in futures.as_completed(extract_jobs):
                if future.exception():
                    self.log.error("Got error from chunk extraction: %s", future.exception())
                    extract_errors += 1
                    continue

        if download_errors:
            raise RestoreError("Backup download failed with {} errors".format(download_errors))
        if extract_errors:
            raise RestoreError("Backup extraction failed with {} errors".format(extract_errors))

        create_recovery_conf(
            dirpath=pgdata,
            site=site,
            port=self.config["http_port"],
            primary_conninfo=primary_conninfo,
            recovery_end_command=recovery_end_command,
            recovery_target_action=recovery_target_action,
            recovery_target_name=recovery_target_name,
            recovery_target_time=recovery_target_time,
            recovery_target_xid=recovery_target_xid,
            restore_to_master=restore_to_master,
        )

        print("Basebackup restoration complete.")
        print("You can start PostgreSQL by running pg_ctl -D %s start" % pgdata)
        print("On systemd based systems you can run systemctl start postgresql")
        print("On SYSV Init based systems you can run /etc/init.d/postgresql start")
Exemple #16
0
    def _get_basebackup(self, pgdata, basebackup, site,
                        debug=False,
                        status_output_file=None,
                        primary_conninfo=None,
                        recovery_end_command=None,
                        recovery_target_action=None,
                        recovery_target_name=None,
                        recovery_target_time=None,
                        recovery_target_xid=None,
                        restore_to_master=None,
                        overwrite=False,
                        tablespace_mapping=None,
                        tablespace_base_dir=None):
        targets = [recovery_target_name, recovery_target_time, recovery_target_xid]
        if sum(0 if flag is None else 1 for flag in targets) > 1:
            raise RestoreError("Specify at most one of recovery_target_name, "
                               "recovery_target_time or recovery_target_xid")

        # If basebackup that we want it set as latest, figure out which one it is
        if recovery_target_time:
            try:
                recovery_target_time = dates.parse_timestamp(recovery_target_time)
            except (TypeError, ValueError) as ex:
                raise RestoreError("recovery_target_time {!r}: {}".format(recovery_target_time, ex))
            basebackup = self._find_nearest_basebackup(recovery_target_time)
        elif basebackup == "latest":
            basebackup = self._find_nearest_basebackup()
        elif isinstance(basebackup, str):
            basebackup = self._find_basebackup_for_name(basebackup)

        # Grab basebackup metadata to make sure it exists and to look up tablespace requirements
        metadata = self.storage.get_basebackup_metadata(basebackup["name"])
        tablespaces = {}

        # Make sure we have a proper place to write the $PGDATA and possible tablespaces
        dirs_to_create = []
        dirs_to_recheck = []
        dirs_to_wipe = []

        if not os.path.exists(pgdata):
            dirs_to_create.append(pgdata)
        elif overwrite:
            dirs_to_create.append(pgdata)
            dirs_to_wipe.append(pgdata)
        elif os.listdir(pgdata) in ([], ["lost+found"]):
            # Allow empty directories as well as ext3/4 mount points to be used, but check that we can write to them
            dirs_to_recheck.append(["$PGDATA", pgdata])
        else:
            raise RestoreError("$PGDATA target directory {!r} exists, is not empty and --overwrite not specified, aborting."
                               .format(pgdata))

        if metadata.get("format") == "pghoard-bb-v2":
            # "Backup file" is a metadata object, fetch it to get more information
            bmeta_compressed = self.storage.get_file_bytes(basebackup["name"])
            with rohmufile.file_reader(fileobj=io.BytesIO(bmeta_compressed), metadata=metadata,
                                       key_lookup=config.key_lookup_for_site(self.config, site)) as input_obj:
                bmeta = common.extract_pghoard_bb_v2_metadata(input_obj)
            self.log.debug("Backup metadata: %r", bmeta)

            tablespaces = bmeta["tablespaces"]
            basebackup_data_files = [
                [
                    os.path.join(self.config["backup_sites"][site]["prefix"], "basebackup_chunk", chunk["chunk_filename"]),
                    chunk["result_size"],
                ]
                for chunk in bmeta["chunks"]
            ]
            # We need the files from the main basebackup file too
            basebackup_data_files.append([(bmeta_compressed, metadata), 0])

        elif metadata.get("format") == "pghoard-bb-v1":
            # Tablespace information stored in object store metadata, look it up
            tsmetare = re.compile("^tablespace-name-([0-9]+)$")
            for kw, value in metadata.items():
                match = tsmetare.match(kw)
                if not match:
                    continue
                tsoid = match.group(1)
                tsname = value
                tspath = metadata["tablespace-path-{}".format(tsoid)]
                tablespaces[tsname] = {
                    "oid": int(tsoid),
                    "path": tspath,
                }

            basebackup_data_files = [[basebackup["name"], basebackup["size"]]]

        else:
            # Object is a raw (encrypted, compressed) basebackup
            basebackup_data_files = [[basebackup["name"], basebackup["size"]]]

        if tablespace_base_dir and not os.path.exists(tablespace_base_dir) and not overwrite:
            # we just care that the dir exists, but we're OK if there are other objects there
            raise RestoreError("Tablespace base directory {!r} does not exist, aborting."
                               .format(tablespace_base_dir))

        # Map tablespaces as requested and make sure the directories exist
        for tsname, tsinfo in tablespaces.items():
            tspath = tablespace_mapping.pop(tsname, tsinfo["path"])
            if tablespace_base_dir and not os.path.exists(tspath):
                tspath = os.path.join(tablespace_base_dir, str(tsinfo["oid"]))
                os.makedirs(tspath, exist_ok=True)
            if not os.path.exists(tspath):
                raise RestoreError("Tablespace {!r} target directory {!r} does not exist, aborting."
                                   .format(tsname, tspath))
            if os.listdir(tspath) not in ([], ["lost+found"]):
                # Allow empty directories as well as ext3/4 mount points to be used, but check that we can write to them
                raise RestoreError("Tablespace {!r} target directory {!r} exists but is not empty, aborting."
                                   .format(tsname, tspath))

            tsinfo["path"] = tspath
            print("Using existing empty directory {!r} for tablespace {!r}".format(tspath, tsname))
            dirs_to_recheck.append(["Tablespace {!r}".format(tsname), tspath])

        # We .pop() the elements of tablespace_mapping above - if mappings are given they must all exist or the
        # user probably made a typo with tablespace names, abort in that case.
        if tablespace_mapping:
            raise RestoreError("Tablespace mapping for {} was requested, but the tablespaces are not present in the backup"
                               .format(sorted(tablespace_mapping)))

        # First check that the existing (empty) directories are writable, then possibly wipe any directories as
        # requested by --overwrite and finally create the new dirs
        for diruse, dirname in dirs_to_recheck:
            try:
                tempfile.TemporaryFile(dir=dirname).close()
            except PermissionError:
                raise RestoreError("{} target directory {!r} is empty, but not writable, aborting."
                                   .format(diruse, dirname))

        for dirname in dirs_to_wipe:
            shutil.rmtree(dirname)
        for dirname in dirs_to_create:
            os.makedirs(dirname)
            os.chmod(dirname, 0o700)

        fetcher = BasebackupFetcher(
            app_config=self.config,
            data_files=basebackup_data_files,
            status_output_file=status_output_file,
            debug=debug,
            pgdata=pgdata,
            site=site,
            tablespaces=tablespaces,
        )
        fetcher.fetch_all()

        create_recovery_conf(
            dirpath=pgdata,
            site=site,
            port=self.config["http_port"],
            primary_conninfo=primary_conninfo,
            recovery_end_command=recovery_end_command,
            recovery_target_action=recovery_target_action,
            recovery_target_name=recovery_target_name,
            recovery_target_time=recovery_target_time,
            recovery_target_xid=recovery_target_xid,
            restore_to_master=restore_to_master,
        )

        print("Basebackup restoration complete.")
        print("You can start PostgreSQL by running pg_ctl -D %s start" % pgdata)
        print("On systemd based systems you can run systemctl start postgresql")
        print("On SYSV Init based systems you can run /etc/init.d/postgresql start")