Example #1
0
    def test_stop(self, version, service_stop, status_set, log):
        version.return_value = '9.9'

        # Normal shutdown shuts down.
        service_stop.return_value = True
        postgresql.stop()
        service_stop.assert_called_once_with('[email protected]')

        # Failed shutdown blocks and terminates
        service_stop.return_value = False
        with self.assertRaises(SystemExit) as x:
            postgresql.stop()
        status_set.assert_called_once_with('blocked', ANY)
        self.assertEqual(x.exception.code, 0)  # Exit cleanly
Example #2
0
def detaching():
    if reactive.is_state('postgresql.storage.pgdata.migrated'):
        # We don't attempt to migrate data back to local storage as there
        # is probably not enough of it. And we are most likely destroying
        # the unit, so it would be a waste of time even if there is enough
        # space.
        hookenv.status_set('blocked', 'Storage detached. Database destroyed.')
        reactive.set_state('postgresql.cluster.destroyed')
        reactive.remove_state('postgresql.cluster.created')
        reactive.remove_state('postgresql.cluster.configured')
        reactive.remove_state('postgresql.cluster.is_running')
        postgresql.stop()
    else:
        unitdata.kv().unset(pgdata_mount_key)
        unitdata.kv().unset(pgdata_path_key)
        reactive.remove_state('postgresql.storage.pgdata.attached')
    def test_stop(self, version, check_call, status_set, log):
        version.return_value = "9.9"

        # Normal shutdown shuts down.
        postgresql.stop()
        # -t option is required to wait for shutdown to complete. -w not
        # required unlike 'start', but lets be explicit.
        check_call.assert_called_once_with(
            ["pg_ctlcluster", "--mode", "fast", "9.9", "main", "stop", "--", "-w", "-t", "300"], universal_newlines=True
        )

        # If the server is not running, pg_ctlcluster(1) signals this with
        # returncode 2.
        check_call.side_effect = subprocess.CalledProcessError(2, "whoops")
        check_call.reset_mock()
        postgresql.stop()
        # -t option is required to wait for shutdown to complete. -w not
        # required unlike 'start', but lets be explicit.
        check_call.assert_called_once_with(
            ["pg_ctlcluster", "--mode", "fast", "9.9", "main", "stop", "--", "-w", "-t", "300"], universal_newlines=True
        )

        # If 'fast' shutdown fails, we retry with an 'immediate' shutdown
        check_call.side_effect = iter([subprocess.CalledProcessError(42, "x"), None])
        check_call.reset_mock()
        postgresql.stop()
        check_call.assert_has_calls(
            [
                call(
                    ["pg_ctlcluster", "--mode", "fast", "9.9", "main", "stop", "--", "-w", "-t", "300"],
                    universal_newlines=True,
                ),
                call(
                    ["pg_ctlcluster", "--mode", "immediate", "9.9", "main", "stop", "--", "-w", "-t", "300"],
                    universal_newlines=True,
                ),
            ]
        )

        # If both fail, we block the unit.
        check_call.side_effect = subprocess.CalledProcessError(42, "x")
        with self.assertRaises(SystemExit) as x:
            postgresql.stop()
        status_set.assert_called_once_with("blocked", ANY)
        self.assertEqual(x.exception.code, 0)  # Exit cleanly
Example #4
0
def stop():
    status_set("maintenance", "Stopping PostgreSQL")
    postgresql.stop()
    reactive.remove_state("postgresql.cluster.is_running")
Example #5
0
def wal_e_restore():
    reactive.remove_state("action.wal-e-restore")
    params = hookenv.action_get()
    backup = params["backup-name"].strip().replace("-", "_")
    storage_uri = params["storage-uri"].strip()

    ship_uri = hookenv.config().get("wal_e_storage_uri")
    if storage_uri == ship_uri:
        hookenv.action_fail(
            "The storage-uri parameter is identical to "
            "the wal_e_storage_uri config setting. Your "
            "restoration source cannot be the same as the "
            "folder you are archiving too to avoid corrupting "
            "the backups."
        )
        return

    if not params["confirm"]:
        m = "Recovery from {}.".format(storage_uri)
        if ship_uri:
            m += "\nContents of {} will be destroyed.".format(ship_uri)
        m += "\nExisting local database will be destroyed."
        m += "\nRerun action with 'confirm=true' to proceed."
        hookenv.action_set({"info": m})
        return

    with tempfile.TemporaryDirectory(prefix="wal-e", suffix="envdir") as envdir:
        update_wal_e_env_dir(envdir, storage_uri)

        # Confirm there is a backup to restore
        backups = wal_e_list_backups(envdir)
        if not backups:
            hookenv.action_fail("No backups found at {}".format(storage_uri))
            return
        if backup != "LATEST" and backup not in (b["name"] for b in backups):
            hookenv.action_fail("Backup {} not found".format(backup))
            return

        # Shutdown PostgreSQL. Note we want this action to run synchronously,
        # so there is no opportunity to ask permission from the leader. If
        # there are other units cloning this database, those clone operations
        # will fail. Which seems preferable to blocking a recovery operation
        # in any case, because if we are doing disaster recovery we generally
        # want to do it right now.
        status_set("maintenance", "Stopping PostgreSQL for backup restoration")
        postgresql.stop()

        # Trash the existing database. Its dangerous to do this first, but
        # we probably need the space.
        data_dir = postgresql.data_dir()  # May be a symlink
        for content in os.listdir(data_dir):
            cpath = os.path.join(data_dir, content)
            if os.path.isdir(cpath) and not os.path.islink(cpath):
                shutil.rmtree(cpath)
            else:
                os.remove(cpath)

        # WAL-E recover
        status_set("maintenance", "Restoring backup {}".format(backup))
        wal_e_run(["backup-fetch", data_dir, backup], envdir=envdir)

        # Create recovery.conf to complete recovery
        is_master = reactive.is_state("postgresql.replication.is_master")
        standby_mode = "off" if is_master else "on"
        if params.get("target-time"):
            target_time = "recovery_target_time='{}'" "".format(params["target-time"])
        else:
            target_time = ""
        target_action = "promote" if is_master else "shutdown"
        immediate = "" if is_master else "recovery_target='immediate'"
        helpers.write(
            postgresql.recovery_conf_path(),
            dedent(
                """\
                             # Managed by Juju. PITR in progress.
                             standby_mode = {}
                             restore_command='{}'
                             recovery_target_timeline = {}
                             recovery_target_action = {}
                             {}
                             {}
                             """
            ).format(
                standby_mode,
                wal_e_restore_command(envdir=envdir),
                params["target-timeline"],
                target_action,
                target_time,
                immediate,
            ),
            mode=0o600,
            user="******",
            group="postgres",
        )

        # Avoid circular import. We could also avoid the import entirely
        # with a sufficiently complex set of handlers in the replication
        # module, but that seems to be a worse solution. Better to break
        # out this action into a separate module.
        from reactive.postgresql import replication

        if is_master:
            if ship_uri:
                # If master, trash the configured wal-e storage. This may
                # contain WAL and backups from the old cluster which will
                # conflict with the new cluster. Hopefully it does not
                # contain anything important, because we have no way to
                # prompt the user for confirmation.
                wal_e_run(["delete", "--confirm", "everything"])

            # Then, wait for recovery and promotion.
            postgresql.start()
            con = postgresql.connect()
            cur = con.cursor()
            while True:
                if postgresql.has_version("10"):
                    cur.execute(
                        """SELECT pg_is_in_recovery(),
                                          pg_last_wal_replay_lsn()"""
                    )
                else:
                    cur.execute(
                        """SELECT pg_is_in_recovery(),
                                          pg_last_xlog_replay_location()"""
                    )
                in_rec, loc = cur.fetchone()
                if not in_rec:
                    break
                status_set("maintenance", "Recovery at {}".format(loc))
                time.sleep(10)
        else:
            # If standby, startup and wait for recovery to complete and
            # shutdown.
            status_set("maintenance", "Recovery")
            # Startup might shutdown immediately and look like a failure.
            postgresql.start(ignore_failure=True)
            # No recovery point status yet for standbys, as we would need
            # to handle connection failures when the DB shuts down. We
            # should do this.
            while postgresql.is_running():
                time.sleep(5)
            replication.update_recovery_conf(follow=replication.get_master())

    # Reactive handlers will deal with the rest of the cleanup.
    # eg. ensuring required users and roles exist
    replication.update_replication_states()
    reactive.remove_state("postgresql.cluster.configured")
    reactive.toggle_state("postgresql.cluster.is_running", postgresql.is_running())
    reactive.remove_state("postgresql.nagios.user_ensured")
    reactive.remove_state("postgresql.replication.replication_user_created")
    reactive.remove_state("postgresql.client.published")
Example #6
0
def stop():
    status_set('maintenance', 'Stopping PostgreSQL')
    postgresql.stop()
    reactive.remove_state('postgresql.cluster.is_running')