def test_stop(self, version, service_stop, status_set, log): version.return_value = '9.9' # Normal shutdown shuts down. service_stop.return_value = True postgresql.stop() service_stop.assert_called_once_with('[email protected]') # Failed shutdown blocks and terminates service_stop.return_value = False with self.assertRaises(SystemExit) as x: postgresql.stop() status_set.assert_called_once_with('blocked', ANY) self.assertEqual(x.exception.code, 0) # Exit cleanly
def detaching(): if reactive.is_state('postgresql.storage.pgdata.migrated'): # We don't attempt to migrate data back to local storage as there # is probably not enough of it. And we are most likely destroying # the unit, so it would be a waste of time even if there is enough # space. hookenv.status_set('blocked', 'Storage detached. Database destroyed.') reactive.set_state('postgresql.cluster.destroyed') reactive.remove_state('postgresql.cluster.created') reactive.remove_state('postgresql.cluster.configured') reactive.remove_state('postgresql.cluster.is_running') postgresql.stop() else: unitdata.kv().unset(pgdata_mount_key) unitdata.kv().unset(pgdata_path_key) reactive.remove_state('postgresql.storage.pgdata.attached')
def test_stop(self, version, check_call, status_set, log): version.return_value = "9.9" # Normal shutdown shuts down. postgresql.stop() # -t option is required to wait for shutdown to complete. -w not # required unlike 'start', but lets be explicit. check_call.assert_called_once_with( ["pg_ctlcluster", "--mode", "fast", "9.9", "main", "stop", "--", "-w", "-t", "300"], universal_newlines=True ) # If the server is not running, pg_ctlcluster(1) signals this with # returncode 2. check_call.side_effect = subprocess.CalledProcessError(2, "whoops") check_call.reset_mock() postgresql.stop() # -t option is required to wait for shutdown to complete. -w not # required unlike 'start', but lets be explicit. check_call.assert_called_once_with( ["pg_ctlcluster", "--mode", "fast", "9.9", "main", "stop", "--", "-w", "-t", "300"], universal_newlines=True ) # If 'fast' shutdown fails, we retry with an 'immediate' shutdown check_call.side_effect = iter([subprocess.CalledProcessError(42, "x"), None]) check_call.reset_mock() postgresql.stop() check_call.assert_has_calls( [ call( ["pg_ctlcluster", "--mode", "fast", "9.9", "main", "stop", "--", "-w", "-t", "300"], universal_newlines=True, ), call( ["pg_ctlcluster", "--mode", "immediate", "9.9", "main", "stop", "--", "-w", "-t", "300"], universal_newlines=True, ), ] ) # If both fail, we block the unit. check_call.side_effect = subprocess.CalledProcessError(42, "x") with self.assertRaises(SystemExit) as x: postgresql.stop() status_set.assert_called_once_with("blocked", ANY) self.assertEqual(x.exception.code, 0) # Exit cleanly
def stop(): status_set("maintenance", "Stopping PostgreSQL") postgresql.stop() reactive.remove_state("postgresql.cluster.is_running")
def wal_e_restore(): reactive.remove_state("action.wal-e-restore") params = hookenv.action_get() backup = params["backup-name"].strip().replace("-", "_") storage_uri = params["storage-uri"].strip() ship_uri = hookenv.config().get("wal_e_storage_uri") if storage_uri == ship_uri: hookenv.action_fail( "The storage-uri parameter is identical to " "the wal_e_storage_uri config setting. Your " "restoration source cannot be the same as the " "folder you are archiving too to avoid corrupting " "the backups." ) return if not params["confirm"]: m = "Recovery from {}.".format(storage_uri) if ship_uri: m += "\nContents of {} will be destroyed.".format(ship_uri) m += "\nExisting local database will be destroyed." m += "\nRerun action with 'confirm=true' to proceed." hookenv.action_set({"info": m}) return with tempfile.TemporaryDirectory(prefix="wal-e", suffix="envdir") as envdir: update_wal_e_env_dir(envdir, storage_uri) # Confirm there is a backup to restore backups = wal_e_list_backups(envdir) if not backups: hookenv.action_fail("No backups found at {}".format(storage_uri)) return if backup != "LATEST" and backup not in (b["name"] for b in backups): hookenv.action_fail("Backup {} not found".format(backup)) return # Shutdown PostgreSQL. Note we want this action to run synchronously, # so there is no opportunity to ask permission from the leader. If # there are other units cloning this database, those clone operations # will fail. Which seems preferable to blocking a recovery operation # in any case, because if we are doing disaster recovery we generally # want to do it right now. status_set("maintenance", "Stopping PostgreSQL for backup restoration") postgresql.stop() # Trash the existing database. Its dangerous to do this first, but # we probably need the space. data_dir = postgresql.data_dir() # May be a symlink for content in os.listdir(data_dir): cpath = os.path.join(data_dir, content) if os.path.isdir(cpath) and not os.path.islink(cpath): shutil.rmtree(cpath) else: os.remove(cpath) # WAL-E recover status_set("maintenance", "Restoring backup {}".format(backup)) wal_e_run(["backup-fetch", data_dir, backup], envdir=envdir) # Create recovery.conf to complete recovery is_master = reactive.is_state("postgresql.replication.is_master") standby_mode = "off" if is_master else "on" if params.get("target-time"): target_time = "recovery_target_time='{}'" "".format(params["target-time"]) else: target_time = "" target_action = "promote" if is_master else "shutdown" immediate = "" if is_master else "recovery_target='immediate'" helpers.write( postgresql.recovery_conf_path(), dedent( """\ # Managed by Juju. PITR in progress. standby_mode = {} restore_command='{}' recovery_target_timeline = {} recovery_target_action = {} {} {} """ ).format( standby_mode, wal_e_restore_command(envdir=envdir), params["target-timeline"], target_action, target_time, immediate, ), mode=0o600, user="******", group="postgres", ) # Avoid circular import. We could also avoid the import entirely # with a sufficiently complex set of handlers in the replication # module, but that seems to be a worse solution. Better to break # out this action into a separate module. from reactive.postgresql import replication if is_master: if ship_uri: # If master, trash the configured wal-e storage. This may # contain WAL and backups from the old cluster which will # conflict with the new cluster. Hopefully it does not # contain anything important, because we have no way to # prompt the user for confirmation. wal_e_run(["delete", "--confirm", "everything"]) # Then, wait for recovery and promotion. postgresql.start() con = postgresql.connect() cur = con.cursor() while True: if postgresql.has_version("10"): cur.execute( """SELECT pg_is_in_recovery(), pg_last_wal_replay_lsn()""" ) else: cur.execute( """SELECT pg_is_in_recovery(), pg_last_xlog_replay_location()""" ) in_rec, loc = cur.fetchone() if not in_rec: break status_set("maintenance", "Recovery at {}".format(loc)) time.sleep(10) else: # If standby, startup and wait for recovery to complete and # shutdown. status_set("maintenance", "Recovery") # Startup might shutdown immediately and look like a failure. postgresql.start(ignore_failure=True) # No recovery point status yet for standbys, as we would need # to handle connection failures when the DB shuts down. We # should do this. while postgresql.is_running(): time.sleep(5) replication.update_recovery_conf(follow=replication.get_master()) # Reactive handlers will deal with the rest of the cleanup. # eg. ensuring required users and roles exist replication.update_replication_states() reactive.remove_state("postgresql.cluster.configured") reactive.toggle_state("postgresql.cluster.is_running", postgresql.is_running()) reactive.remove_state("postgresql.nagios.user_ensured") reactive.remove_state("postgresql.replication.replication_user_created") reactive.remove_state("postgresql.client.published")
def stop(): status_set('maintenance', 'Stopping PostgreSQL') postgresql.stop() reactive.remove_state('postgresql.cluster.is_running')