Example #1
0
def test_storage_config(tmpdir):
    config = {
        "backup_location": None,
    }
    assert get_object_storage_config(config, "default") is None
    site_config = config.setdefault("backup_sites",
                                    {}).setdefault("default", {})
    assert get_object_storage_config(config, "default") is None

    config["backup_location"] = tmpdir.strpath
    local_type_conf = {"directory": tmpdir.strpath, "storage_type": "local"}
    assert get_object_storage_config(config, "default") == local_type_conf

    site_config["object_storage"] = {}
    with pytest.raises(errors.InvalidConfigurationError) as excinfo:
        get_object_storage_config(config, "default")
    assert "storage_type not defined in site 'default'" in str(excinfo.value)

    site_config["object_storage"] = {"storage_type": "foo", "other": "bar"}
    foo_type_conf = get_object_storage_config(config, "default")
    assert foo_type_conf == {"storage_type": "foo", "other": "bar"}

    with pytest.raises(errors.InvalidConfigurationError) as excinfo:
        get_transfer(foo_type_conf)
    assert "unsupported storage type 'foo'" in str(excinfo.value)
Example #2
0
def _test_storage_init(storage_type,
                       with_prefix,
                       tmpdir,
                       config_overrides=None):
    if storage_type == "local":
        storage_config = {"directory": str(tmpdir.join("rohmu"))}
    else:
        try:
            conf_func = getattr(test_storage_configs, "config_" + storage_type)
        except AttributeError:
            pytest.skip(storage_type + " config isn't available")
        storage_config = conf_func()

    if storage_type in ("aws_s3", "ceph_s3"):
        driver = "s3"
    elif storage_type == "ceph_swift":
        driver = "swift"
    else:
        driver = storage_type
    storage_config["storage_type"] = driver

    if with_prefix:
        storage_config["prefix"] = uuid.uuid4().hex

    if config_overrides:
        storage_config = storage_config.copy()
        storage_config.update(config_overrides)

    st = get_transfer(storage_config)
    _test_storage(st, driver, tmpdir, storage_config)
Example #3
0
 def get_or_create_site_storage(self, site):
     storage = self.site_transfers.get(site)
     if not storage:
         storage_config = get_object_storage_config(self.config, site)
         storage = get_transfer(storage_config)
         self.site_transfers[site] = storage
     return storage
Example #4
0
    def loop(self):
        while True:
            action = self.queue_in.get()
            if not action:
                return
            start_time = time.monotonic()
            exception = None
            try:
                self.log.info("Starting to download %r", action["remote_key"])
                if self.transfer is None:
                    self.transfer = get_transfer(self.config["object_storage"])
                # TODO: Monitor progress
                with contextlib.suppress(OSError):
                    os.remove(action["local_file_name"])
                with open(action["local_file_name"], "wb") as output_file:
                    output_obj = DecompressSink(output_file, action["compression_algorithm"])
                    output_obj = DecryptSink(output_obj, action["remote_file_size"], self.rsa_private_key_pem)
                    self.transfer.get_contents_to_fileobj(action["remote_key"], output_obj)
                    self.log.info(
                        "%r successfully saved as %r in %.2f seconds", action["remote_key"], action["local_file_name"],
                        time.monotonic() - start_time
                    )
            except Exception as ex:  # pylint: disable=broad-except
                exception = ex
                self.log.exception("An error occurred while handling action")

            # Convert exception to string as it might not be picklable
            result = {
                **action,
                "duration": time.monotonic() - start_time,
                "message": str(exception) if exception else None,
                "result": "failure" if exception else "success",
            }
            self.queue_out.put(result)
Example #5
0
def _test_storage_init(storage_type, with_prefix, tmpdir, config_overrides=None):
    if storage_type == "local":
        storage_config = {"directory": str(tmpdir.join("rohmu"))}
    else:
        try:
            conf_func = getattr(test_storage_configs, "config_" + storage_type)
        except AttributeError:
            pytest.skip(storage_type + " config isn't available")
        storage_config = conf_func()

    if storage_type in ("aws_s3", "ceph_s3"):
        driver = "s3"
    elif storage_type == "ceph_swift":
        driver = "swift"
    else:
        driver = storage_type
    storage_config["storage_type"] = driver

    if with_prefix:
        storage_config["prefix"] = uuid.uuid4().hex

    if config_overrides:
        storage_config = storage_config.copy()
        storage_config.update(config_overrides)

    st = get_transfer(storage_config)
    _test_storage(st, driver, tmpdir, storage_config)
Example #6
0
 def _choose_storage(self, storage=None):
     if storage is None or storage == "":
         storage = self.config.default_storage
     self.storage_name = storage
     self.storage_config = self.config.storages[storage]
     self.storage = rohmu.get_transfer(
         self.storage_config.dict(by_alias=True, exclude_unset=True))
Example #7
0
    def _test_create_basebackup(self,
                                capsys,
                                db,
                                pghoard,
                                mode,
                                replica=False,
                                active_backup_mode='archive_command'):
        pghoard.create_backup_site_paths(pghoard.test_site)
        basebackup_path = os.path.join(pghoard.config["backup_location"],
                                       pghoard.test_site, "basebackup")
        q = Queue()

        pghoard.config["backup_sites"][
            pghoard.test_site]["basebackup_mode"] = mode
        pghoard.config["backup_sites"][
            pghoard.test_site]["active_backup_mode"] = active_backup_mode

        pghoard.create_basebackup(pghoard.test_site, db.user, basebackup_path,
                                  q)
        result = q.get(timeout=60)
        assert result["success"]

        # make sure it shows on the list
        Restore().run([
            "list-basebackups",
            "--config",
            pghoard.config_path,
            "--site",
            pghoard.test_site,
            "--verbose",
        ])
        out, _ = capsys.readouterr()
        assert pghoard.test_site in out
        assert "pg-version" in out

        assert "start-wal-segment" in out
        if mode == "local-tar":
            assert "end-time" in out
            if replica is False:
                assert "end-wal-segment" in out

        storage_config = common.get_object_storage_config(
            pghoard.config, pghoard.test_site)
        storage = get_transfer(storage_config)
        backups = storage.list_path(
            os.path.join(
                pghoard.config["backup_sites"][pghoard.test_site]["prefix"],
                "basebackup"))
        for backup in backups:
            assert "start-wal-segment" in backup["metadata"]
            assert "start-time" in backup["metadata"]
            assert dateutil.parser.parse(
                backup["metadata"]["start-time"]).tzinfo  # pylint: disable=no-member
            if mode == "local-tar":
                if replica is False:
                    assert "end-wal-segment" in backup["metadata"]
                assert "end-time" in backup["metadata"]
                assert dateutil.parser.parse(
                    backup["metadata"]["end-time"]).tzinfo  # pylint: disable=no-member
Example #8
0
    def _test_restore_basebackup(self, db, pghoard, tmpdir, active_backup_mode="archive_command"):
        backup_out = tmpdir.join("test-restore").strpath
        # Restoring to empty directory works
        os.makedirs(backup_out)
        Restore().run([
            "get-basebackup",
            "--config", pghoard.config_path,
            "--site", pghoard.test_site,
            "--target-dir", backup_out,
        ])
        # Restoring on top of another $PGDATA doesn't
        with pytest.raises(RestoreError) as excinfo:
            Restore().run([
                "get-basebackup",
                "--config", pghoard.config_path,
                "--site", pghoard.test_site,
                "--target-dir", backup_out,
            ])
        assert "--overwrite not specified" in str(excinfo.value)
        # Until we use the --overwrite flag
        Restore().run([
            "get-basebackup",
            "--config", pghoard.config_path,
            "--site", pghoard.test_site,
            "--target-dir", backup_out,
            "--overwrite",
        ])
        check_call([os.path.join(db.pgbin, "pg_controldata"), backup_out])
        # TODO: check that the backup is valid

        # there should only be a single backup so lets compare what was in the metadata with what
        # was in the backup label
        storage_config = common.get_object_storage_config(pghoard.config, pghoard.test_site)
        storage = get_transfer(storage_config)
        backups = storage.list_path(os.path.join(pghoard.config["backup_sites"][pghoard.test_site]["prefix"], "basebackup"))

        # lets grab the backup label details for what we restored
        pgb = PGBaseBackup(config=None, site="foosite", connection_info=None,
                           basebackup_path=None, compression_queue=None, transfer_queue=None,
                           metrics=metrics.Metrics(statsd={}))

        path = os.path.join(backup_out, "backup_label")
        with open(path, "r") as myfile:
            data = myfile.read()
            start_wal_segment, start_time = pgb.parse_backup_label(data)

        assert start_wal_segment == backups[0]['metadata']['start-wal-segment']
        assert start_time == backups[0]['metadata']['start-time']

        # for a standalone hot backup, the start wal file will be in the pg_xlog / pg_wal directory
        wal_dir = "pg_xlog"
        if float(db.pgver) >= float("10.0"):
            wal_dir = "pg_wal"

        path = os.path.join(backup_out, wal_dir, backups[0]['metadata']['start-wal-segment'])
        if active_backup_mode == "standalone_hot_backup":
            assert os.path.isfile(path) is True
        else:
            assert os.path.isfile(path) is False
Example #9
0
    def get_object_storage(self, site_name):
        storage = self.site_transfers.get(site_name)
        if not storage:
            storage_type, storage_config = get_object_storage_config(self.config, site_name)
            storage = get_transfer(storage_type, storage_config)
            self.site_transfers[site_name] = storage

        return storage
Example #10
0
 def set_config(self, config_file, site):
     self.config = config.read_json_config_file(config_file,
                                                check_commands=False)
     self.site = config.get_site_from_config(self.config, site)
     self.backup_site = self.config["backup_sites"][self.site]
     storage_config = common.get_object_storage_config(
         self.config, self.site)
     self.storage = get_transfer(storage_config)
Example #11
0
    def _test_create_basebackup(self, capsys, db, pghoard, mode, replica=False, active_backup_mode="archive_command"):
        pghoard.create_backup_site_paths(pghoard.test_site)
        basebackup_path = os.path.join(pghoard.config["backup_location"], pghoard.test_site, "basebackup")
        q = Queue()

        pghoard.config["backup_sites"][pghoard.test_site]["basebackup_mode"] = mode
        pghoard.config["backup_sites"][pghoard.test_site]["active_backup_mode"] = active_backup_mode

        now = datetime.datetime.now(datetime.timezone.utc)
        metadata = {
            "backup-reason": "scheduled",
            "backup-decision-time": now.isoformat(),
            "normalized-backup-time": now.isoformat(),
        }
        pghoard.create_basebackup(pghoard.test_site, db.user, basebackup_path, q, metadata)
        result = q.get(timeout=60)
        assert result["success"]

        # make sure it shows on the list
        Restore().run([
            "list-basebackups",
            "--config",
            pghoard.config_path,
            "--site",
            pghoard.test_site,
            "--verbose",
        ])
        out, _ = capsys.readouterr()
        assert pghoard.test_site in out
        assert "pg-version" in out

        assert "start-wal-segment" in out
        if mode in {BaseBackupMode.local_tar, BaseBackupMode.delta}:
            assert "end-time" in out
            if replica is False:
                assert "end-wal-segment" in out

        storage_config = common.get_object_storage_config(pghoard.config, pghoard.test_site)
        storage = get_transfer(storage_config)
        backups = storage.list_path(os.path.join(pghoard.config["backup_sites"][pghoard.test_site]["prefix"], "basebackup"))
        for backup in backups:
            assert "start-wal-segment" in backup["metadata"]
            assert "start-time" in backup["metadata"]
            assert dateutil.parser.parse(backup["metadata"]["start-time"]).tzinfo  # pylint: disable=no-member
            assert backup["metadata"]["backup-reason"] == "scheduled"
            assert backup["metadata"]["backup-decision-time"] == now.isoformat()
            assert backup["metadata"]["normalized-backup-time"] == now.isoformat()
            if mode in {BaseBackupMode.local_tar, BaseBackupMode.delta}:
                if replica is False:
                    assert "end-wal-segment" in backup["metadata"]
                assert "end-time" in backup["metadata"]
                assert dateutil.parser.parse(backup["metadata"]["end-time"]).tzinfo  # pylint: disable=no-member
Example #12
0
def test_storage_config(tmpdir):
    config = {}
    assert get_object_storage_config(config, "default") is None
    site_config = config.setdefault("backup_sites", {}).setdefault("default", {})
    assert get_object_storage_config(config, "default") is None

    config["backup_location"] = tmpdir.strpath
    local_type_conf = {"directory": tmpdir.strpath, "storage_type": "local"}
    assert get_object_storage_config(config, "default") == local_type_conf

    site_config["object_storage"] = {}
    with pytest.raises(errors.InvalidConfigurationError) as excinfo:
        get_object_storage_config(config, "default")
    assert "storage_type not defined in site 'default'" in str(excinfo.value)

    site_config["object_storage"] = {"storage_type": "foo", "other": "bar"}
    foo_type_conf = get_object_storage_config(config, "default")
    assert foo_type_conf == {"storage_type": "foo", "other": "bar"}

    with pytest.raises(errors.InvalidConfigurationError) as excinfo:
        get_transfer(foo_type_conf)
    assert "unsupported storage type 'foo'" in str(excinfo.value)
Example #13
0
    def get_remote_basebackups_info(self, site):
        storage = self.site_transfers.get(site)
        if not storage:
            storage_config = get_object_storage_config(self.config, site)
            storage = get_transfer(storage_config)
            self.site_transfers[site] = storage

        results = storage.list_path(os.path.join(self.config["backup_sites"][site]["prefix"], "basebackup"))
        for entry in results:
            # drop path from resulting list and convert timestamps
            entry["name"] = os.path.basename(entry["name"])
            entry["metadata"]["start-time"] = dates.parse_timestamp(entry["metadata"]["start-time"])

        results.sort(key=lambda entry: entry["metadata"]["start-time"])
        return results
Example #14
0
    def get_remote_basebackups_info(self, site):
        storage = self.site_transfers.get(site)
        if not storage:
            storage_config = get_object_storage_config(self.config, site)
            storage = get_transfer(storage_config)
            self.site_transfers[site] = storage

        site_config = self.config["backup_sites"][site]
        results = storage.list_path(
            os.path.join(site_config["prefix"], "basebackup"))
        for entry in results:
            self.patch_basebackup_info(entry=entry, site_config=site_config)

        results.sort(key=lambda entry: entry["metadata"]["start-time"])
        return results
Example #15
0
    def get_remote_basebackups_info(self, site):
        storage = self.site_transfers.get(site)
        if not storage:
            storage_config = get_object_storage_config(self.config, site)
            storage = get_transfer(storage_config)
            self.site_transfers[site] = storage

        results = storage.list_path(os.path.join(self.config["path_prefix"], site, "basebackup"))
        for entry in results:
            # drop path from resulting list and convert timestamps
            entry["name"] = os.path.basename(entry["name"])
            entry["metadata"]["start-time"] = dateutil.parser.parse(entry["metadata"]["start-time"])

        results.sort(key=lambda entry: entry["metadata"]["start-time"])
        return results
Example #16
0
def _remote_file_fetch_loop(app_config, task_queue, result_queue):
    transfers = {}
    while True:
        task = task_queue.get()
        if not task:
            return
        try:
            site, key, target_path = task
            transfer = transfers.get(site)
            if not transfer:
                transfer = get_transfer(get_object_storage_config(app_config, site))
                transfers[site] = transfer
            file_size, metadata = FileFetcher(app_config, transfer).fetch(site, key, target_path)
            result_queue.put((task, file_size, metadata))
        except Exception as e:  # pylint: disable=broad-except
            result_queue.put((task, e))
Example #17
0
def _remote_file_fetch_loop(app_config, task_queue, result_queue):
    transfers = {}
    while True:
        task = task_queue.get()
        if not task:
            return
        try:
            site, key, target_path = task
            transfer = transfers.get(site)
            if not transfer:
                transfer = get_transfer(get_object_storage_config(app_config, site))
                transfers[site] = transfer
            file_size, metadata = FileFetcher(app_config, transfer).fetch(site, key, target_path)
            result_queue.put((task, file_size, metadata))
        except Exception as e:  # pylint: disable=broad-except
            result_queue.put((task, e))
Example #18
0
def _test_storage_init(storage_type,
                       with_prefix,
                       tmpdir,
                       config_overrides=None):
    if storage_type == "local":
        storage_config = {"directory": str(tmpdir.join("rohmu"))}
    elif storage_type == "sftp" and os.path.isfile(
            "/home/vagrant/pghoard-test-sftp-user"):
        with open("/home/vagrant/pghoard-test-sftp-user", "r") as sftpuser:
            username, password = sftpuser.read().strip().split(":")

        if username:
            # to ensure we are testing that you can use other than port 22, vagrant uses port 23
            storage_config = {
                "server": "localhost",
                "port": 23,
                "username": username,
                "password": password
            }

            # for no prefix testing, we need to cleanup existing files
            os.system("sudo rm -rf /home/{}/*".format(username))
    else:
        try:
            conf_func = getattr(test_storage_configs,
                                "config_{}".format(storage_type))
        except AttributeError:
            pytest.skip("{} config isn't available".format(storage_type))
        storage_config = conf_func()

    if storage_type in ["aws_s3", "ceph_s3"]:
        driver = "s3"
    elif storage_type == "ceph_swift":
        driver = "swift"
    else:
        driver = storage_type
    storage_config["storage_type"] = driver

    if with_prefix:
        storage_config["prefix"] = uuid.uuid4().hex

    if config_overrides:
        storage_config = storage_config.copy()
        storage_config.update(config_overrides)

    st = get_transfer(storage_config)
    _test_storage(st, driver, tmpdir, storage_config)
Example #19
0
    def _test_create_basebackup(self, capsys, db, pghoard, mode, replica=False, active_backup_mode='archive_command'):
        pghoard.create_backup_site_paths(pghoard.test_site)
        basebackup_path = os.path.join(pghoard.config["backup_location"], pghoard.test_site, "basebackup")
        q = Queue()

        pghoard.config["backup_sites"][pghoard.test_site]["basebackup_mode"] = mode
        pghoard.config["backup_sites"][pghoard.test_site]["active_backup_mode"] = active_backup_mode

        pghoard.create_basebackup(pghoard.test_site, db.user, basebackup_path, q)
        result = q.get(timeout=60)
        assert result["success"]

        # make sure it shows on the list
        Restore().run([
            "list-basebackups",
            "--config", pghoard.config_path,
            "--site", pghoard.test_site,
            "--verbose",
        ])
        out, _ = capsys.readouterr()
        assert pghoard.test_site in out
        assert "pg-version" in out

        assert "start-wal-segment" in out
        if mode == "local-tar":
            assert "end-time" in out
            if replica is False:
                assert "end-wal-segment" in out

        storage_config = common.get_object_storage_config(pghoard.config, pghoard.test_site)
        storage = get_transfer(storage_config)
        backups = storage.list_path(os.path.join(pghoard.config["backup_sites"][pghoard.test_site]["prefix"], "basebackup"))
        for backup in backups:
            assert "start-wal-segment" in backup["metadata"]
            assert "start-time" in backup["metadata"]
            assert dateutil.parser.parse(backup["metadata"]["start-time"]).tzinfo  # pylint: disable=no-member
            if mode == "local-tar":
                if replica is False:
                    assert "end-wal-segment" in backup["metadata"]
                assert "end-time" in backup["metadata"]
                assert dateutil.parser.parse(backup["metadata"]["end-time"]).tzinfo  # pylint: disable=no-member
Example #20
0
 def _get_object_storage(self, site, pgdata):
     storage_config = common.get_object_storage_config(self.config, site)
     storage = get_transfer(storage_config)
     return ObjectStore(storage, self.config["path_prefix"], site, pgdata)
Example #21
0
 def _get_object_storage(self, site, pgdata):
     storage_config = common.get_object_storage_config(self.config, site)
     storage = get_transfer(storage_config)
     return ObjectStore(storage, self.config["backup_sites"][site]["prefix"], site, pgdata)
Example #22
0
 def _get_object_storage(self, site, pgdata):
     storage_config = get_object_storage_config(self.config, site)
     storage = get_transfer(storage_config)
     return ObjectStore(storage, self.config["path_prefix"], site, pgdata)
Example #23
0
 def _create_transfer(self):
     object_storage_config = common.get_object_storage_config(self.config, self.site)
     return get_transfer(object_storage_config)
Example #24
0
    def _test_restore_basebackup(self,
                                 db,
                                 pghoard,
                                 tmpdir,
                                 active_backup_mode="archive_command"):
        backup_out = tmpdir.join("test-restore").strpath
        # Restoring to empty directory works
        os.makedirs(backup_out)
        Restore().run([
            "get-basebackup",
            "--config",
            pghoard.config_path,
            "--site",
            pghoard.test_site,
            "--target-dir",
            backup_out,
        ])
        # Restoring on top of another $PGDATA doesn't
        with pytest.raises(RestoreError) as excinfo:
            Restore().run([
                "get-basebackup",
                "--config",
                pghoard.config_path,
                "--site",
                pghoard.test_site,
                "--target-dir",
                backup_out,
            ])
        assert "--overwrite not specified" in str(excinfo.value)
        # Until we use the --overwrite flag
        Restore().run([
            "get-basebackup",
            "--config",
            pghoard.config_path,
            "--site",
            pghoard.test_site,
            "--target-dir",
            backup_out,
            "--overwrite",
        ])
        check_call([os.path.join(db.pgbin, "pg_controldata"), backup_out])
        # TODO: check that the backup is valid

        # there should only be a single backup so lets compare what was in the metadata with what
        # was in the backup label
        storage_config = common.get_object_storage_config(
            pghoard.config, pghoard.test_site)
        storage = get_transfer(storage_config)
        backups = storage.list_path(
            os.path.join(
                pghoard.config["backup_sites"][pghoard.test_site]["prefix"],
                "basebackup"))

        # lets grab the backup label details for what we restored
        pgb = PGBaseBackup(config=None,
                           site="foosite",
                           connection_info=None,
                           basebackup_path=None,
                           compression_queue=None,
                           transfer_queue=None,
                           metrics=metrics.Metrics(statsd={}))

        path = os.path.join(backup_out, "backup_label")
        with open(path, "r") as myfile:
            data = myfile.read()
            start_wal_segment, start_time = pgb.parse_backup_label(data)

        assert start_wal_segment == backups[0]['metadata']['start-wal-segment']
        assert start_time == backups[0]['metadata']['start-time']

        # for a standalone hot backup, the start wal file will be in the pg_xlog / pg_wal directory
        wal_dir = "pg_xlog"
        if float(db.pgver) >= float("10.0"):
            wal_dir = "pg_wal"

        path = os.path.join(backup_out, wal_dir,
                            backups[0]['metadata']['start-wal-segment'])
        if active_backup_mode == "standalone_hot_backup":
            assert os.path.isfile(path) is True
        else:
            assert os.path.isfile(path) is False
Example #25
0
    def _get_basebackup(self, pgdata, basebackup, site,
                        primary_conninfo=None,
                        recovery_end_command=None,
                        recovery_target_action=None,
                        recovery_target_name=None,
                        recovery_target_time=None,
                        recovery_target_xid=None,
                        restore_to_master=None,
                        overwrite=False,
                        tablespace_mapping=None):
        targets = [recovery_target_name, recovery_target_time, recovery_target_xid]
        if sum(0 if flag is None else 1 for flag in targets) > 1:
            raise RestoreError("Specify at most one of recovery_target_name, "
                               "recovery_target_time or recovery_target_xid")

        # If basebackup that we want it set as latest, figure out which one it is
        if recovery_target_time:
            try:
                recovery_target_time = dates.parse_timestamp(recovery_target_time)
            except (TypeError, ValueError) as ex:
                raise RestoreError("recovery_target_time {!r}: {}".format(recovery_target_time, ex))
            basebackup = self._find_nearest_basebackup(recovery_target_time)
        elif basebackup == "latest":
            basebackup = self._find_nearest_basebackup()

        # Grab basebackup metadata to make sure it exists and to look up tablespace requirements
        metadata = self.storage.get_basebackup_metadata(basebackup)
        tablespaces = {}

        # Make sure we have a proper place to write the $PGDATA and possible tablespaces
        dirs_to_create = []
        dirs_to_recheck = []
        dirs_to_wipe = []

        if not os.path.exists(pgdata):
            dirs_to_create.append(pgdata)
        elif overwrite:
            dirs_to_create.append(pgdata)
            dirs_to_wipe.append(pgdata)
        elif os.listdir(pgdata) in ([], ["lost+found"]):
            # Allow empty directories as well as ext3/4 mount points to be used, but check that we can write to them
            dirs_to_recheck.append(["$PGDATA", pgdata])
        else:
            raise RestoreError("$PGDATA target directory {!r} exists, is not empty and --overwrite not specified, aborting."
                               .format(pgdata))

        if metadata.get("format") == "pghoard-bb-v2":
            # "Backup file" is a metadata object, fetch it to get more information
            bmeta_compressed = self.storage.get_file_bytes(basebackup)
            with rohmufile.file_reader(fileobj=io.BytesIO(bmeta_compressed), metadata=metadata,
                                       key_lookup=config.key_lookup_for_site(self.config, site)) as input_obj:
                bmeta = common.extract_pghoard_bb_v2_metadata(input_obj)
            self.log.debug("Backup metadata: %r", bmeta)

            tablespaces = bmeta["tablespaces"]
            basebackup_data_files = [
                [
                    os.path.join(self.config["path_prefix"], site, "basebackup_chunk", chunk["chunk_filename"]),
                    chunk["result_size"],
                ]
                for chunk in bmeta["chunks"]
            ]
            # We need the files from the main basebackup file too
            basebackup_data_files.append([(io.BytesIO(bmeta_compressed), metadata), 0])

        elif metadata.get("format") == "pghoard-bb-v1":
            # Tablespace information stored in object store metadata, look it up
            tsmetare = re.compile("^tablespace-name-([0-9]+)$")
            for kw, value in metadata.items():
                match = tsmetare.match(kw)
                if not match:
                    continue
                tsoid = match.group(1)
                tsname = value
                tspath = metadata["tablespace-path-{}".format(tsoid)]
                tablespaces[tsname] = {
                    "oid": int(tsoid),
                    "path": tspath,
                }

            basebackup_data_files = [[basebackup, -1]]

        else:
            # Object is a raw (encrypted, compressed) basebackup
            basebackup_data_files = [[basebackup, -1]]

        # Map tablespaces as requested and make sure the directories exist
        for tsname, tsinfo in tablespaces.items():
            tspath = tablespace_mapping.pop(tsname, tsinfo["path"])
            if not os.path.exists(tspath):
                raise RestoreError("Tablespace {!r} target directory {!r} does not exist, aborting."
                                   .format(tsname, tspath))
            if os.listdir(tspath) not in ([], ["lost+found"]):
                # Allow empty directories as well as ext3/4 mount points to be used, but check that we can write to them
                raise RestoreError("Tablespace {!r} target directory {!r} exists but is not empty, aborting."
                                   .format(tsname, tspath))

            tsinfo["path"] = tspath
            print("Using existing empty directory {!r} for tablespace {!r}".format(tspath, tsname))
            dirs_to_recheck.append(["Tablespace {!r}".format(tsname), tspath])

        # We .pop() the elements of tablespace_mapping above - if mappings are given they must all exist or the
        # user probably made a typo with tablespace names, abort in that case.
        if tablespace_mapping:
            raise RestoreError("Tablespace mapping for {} was requested, but the tablespaces are not present in the backup"
                               .format(sorted(tablespace_mapping)))

        # First check that the existing (empty) directories are writable, then possibly wipe any directories as
        # requested by --overwrite and finally create the new dirs
        for diruse, dirname in dirs_to_recheck:
            try:
                tempfile.TemporaryFile(dir=dirname).close()
            except PermissionError:
                raise RestoreError("{} target directory {!r} is empty, but not writable, aborting."
                                   .format(diruse, dirname))

        for dirname in dirs_to_wipe:
            shutil.rmtree(dirname)
        for dirname in dirs_to_create:
            os.makedirs(dirname)
            os.chmod(dirname, 0o700)

        total_download_size = sum(item[1] for item in basebackup_data_files)
        progress_report_time = [0]
        download_errors = 0
        extract_errors = 0

        with futures.ThreadPoolExecutor(max_workers=self.config["compression"]["thread_count"]) as extract_executor:
            extract_jobs = []
            with futures.ThreadPoolExecutor(max_workers=self.config["transfer"]["thread_count"]) as download_executor:
                download_jobs = []
                download_progress_per_file = {
                    basebackup_data_file: 0
                    for basebackup_data_file, _ in basebackup_data_files
                    if not isinstance(basebackup_data_file, tuple)
                }

                def download_progress(end=""):
                    # report max once per second
                    if time.monotonic() - progress_report_time[0] < 1:
                        return
                    progress_report_time[0] = time.monotonic()

                    total_downloaded = sum(download_progress_per_file.values())
                    if total_download_size <= 0:
                        progress = 0
                    else:
                        progress = total_downloaded / total_download_size
                    print("\rDownload progress: {progress:.2%} ({dl_mib:.0f} / {total_mib:.0f} MiB)\r".format(
                        progress=progress,
                        dl_mib=total_downloaded / (1024 ** 2),
                        total_mib=total_download_size / (1024 ** 2),
                    ), end=end)

                for basebackup_data_file, backup_data_file_size in basebackup_data_files:
                    if isinstance(basebackup_data_file, tuple):
                        tmp_obj, tmp_metadata = basebackup_data_file
                        extract_jobs.append(extract_executor.submit(
                            self.extract_one_backup,
                            obj=tmp_obj,
                            metadata=tmp_metadata,
                            pgdata=pgdata,
                            site=site,
                            tablespaces=tablespaces,
                        ))
                        continue

                    def single_download_progress(current_pos, expected_max,
                                                 this_file_name=basebackup_data_file,
                                                 this_file_size=backup_data_file_size):
                        download_progress_per_file[this_file_name] = this_file_size * (current_pos / expected_max)
                        download_progress()

                    # NOTE: Most of the transfer clients aren't thread-safe, so initialize a new transfer
                    # client for each download.  We could use thread local storage or pooling here, but
                    # probably not worth the trouble for this use case.
                    transfer = get_transfer(common.get_object_storage_config(self.config, site))
                    download_jobs.append(download_executor.submit(
                        self.download_one_backup,
                        basebackup_data_file=basebackup_data_file,
                        progress_callback=single_download_progress,
                        site=site,
                        transfer=transfer,
                    ))

                for future in futures.as_completed(download_jobs):
                    if future.exception():
                        self.log.error("Got error from chunk download: %s", future.exception())
                        download_errors += 1
                        continue

                    tmp_obj, tmp_metadata = future.result()
                    extract_jobs.append(extract_executor.submit(
                        self.extract_one_backup,
                        obj=tmp_obj,
                        metadata=tmp_metadata,
                        pgdata=pgdata,
                        site=site,
                        tablespaces=tablespaces,
                    ))

                progress_report_time[0] = 0
                download_progress(end="\n")

            for future in futures.as_completed(extract_jobs):
                if future.exception():
                    self.log.error("Got error from chunk extraction: %s", future.exception())
                    extract_errors += 1
                    continue

        if download_errors:
            raise RestoreError("Backup download failed with {} errors".format(download_errors))
        if extract_errors:
            raise RestoreError("Backup extraction failed with {} errors".format(extract_errors))

        create_recovery_conf(
            dirpath=pgdata,
            site=site,
            port=self.config["http_port"],
            primary_conninfo=primary_conninfo,
            recovery_end_command=recovery_end_command,
            recovery_target_action=recovery_target_action,
            recovery_target_name=recovery_target_name,
            recovery_target_time=recovery_target_time,
            recovery_target_xid=recovery_target_xid,
            restore_to_master=restore_to_master,
        )

        print("Basebackup restoration complete.")
        print("You can start PostgreSQL by running pg_ctl -D %s start" % pgdata)
        print("On systemd based systems you can run systemctl start postgresql")
        print("On SYSV Init based systems you can run /etc/init.d/postgresql start")
Example #26
0
 def _create_transfer(self):
     object_storage_config = common.get_object_storage_config(
         self.config, self.site)
     return get_transfer(object_storage_config)