Beispiel #1
0
def update_recovery_conf(follow):
    assert follow != hookenv.local_unit()

    peer_rel = helpers.get_peer_relation()
    follow_relinfo = peer_rel.get(follow)
    assert follow_relinfo is not None, "Invalid upstream {}".format(follow)

    current_follow = get_following()

    if follow != current_follow:
        status_set("maintenance", "Following new unit {}".format(follow))
        set_following(follow)
        # Setting the state to defer publication until after restart.
        reactive.set_state("postgresql.replication.publish_following")

    else:
        # Even though the master is unchanged, we still regenerate
        # recovery.conf in case connection details such as IP addresses
        # have changed.
        hookenv.log("Continuing to follow {}".format(follow))

    pg12 = postgresql.has_version("12")
    if pg12:
        path = postgresql.hot_standby_conf_path()
        template = "hot_standby.conf.tmpl"
    else:
        path = postgresql.recovery_conf_path()
        template = "recovery.conf.tmpl"

    config = hookenv.config()

    data = dict(
        streaming_replication=config["streaming_replication"],
        host=follow_relinfo["host"],
        port=follow_relinfo["port"],
        user=replication_username(),
        password=leader_get("replication_password"),
    )

    if reactive.helpers.is_state("postgresql.wal_e.enabled"):
        data["restore_command"] = wal_e.wal_e_restore_command()

    templating.render(template,
                      path,
                      data,
                      owner="postgres",
                      group="postgres",
                      perms=0o600)

    if pg12:
        touch(postgresql.hot_standby_signal_path())

    # Use @when_file_changed for this when Issue #44 is resolved.
    if reactive.helpers.any_file_changed([path]):
        reactive.set_state("postgresql.cluster.needs_restart")
        if reactive.is_state("postgresql.replication.cloned"):
            reactive.set_state("postgresql.replication.check_following")
Beispiel #2
0
def check_following():
    peer_rel = helpers.get_peer_relation()
    following = get_following()
    if peer_rel is None or following is None:
        reactive.remove_state("postgresql.replication.check_following")
        return
    if postgresql.is_replicating(following, user=replication_username()):
        hookenv.log("Replication of {} is confirmed".format(following))
        reactive.remove_state("postgresql.replication.check_following")
    else:
        status_set("blocked", "Replication of {} has failed".format(following))
Beispiel #3
0
def connect(user="******", database="postgres", unit=None):
    if unit is None or unit == hookenv.local_unit():
        host = None
        port_ = port()
    else:
        relinfo = helpers.get_peer_relation()[unit]
        if "host" not in relinfo or "port" not in relinfo:
            raise InvalidConnection("{} has not published connection details" "".format(unit))
        host = relinfo["host"]
        port_ = relinfo["port"]
    return psycopg2.connect(user=user, database=database, host=host, port=port_)
Beispiel #4
0
def drain_master_and_promote_anointed():
    # Wait until this anointed unit is fully in-sync with the
    # master, and then promote it to master. But first we
    # need to ensure that the master is following us, and that we
    # have no outstanding requests on the restart lock to avoid deadlocking
    # the cluster.
    peer_rel = helpers.get_peer_relation()
    master = get_master()
    if peer_rel is None or master is None:
        return  # Peers all gone? Other handlers will promote.

    if peer_rel[master].get("following") != hookenv.local_unit():
        status_set("waiting",
                   "Waiting for master to follow me, its anointed successor")
        return  # Try again next hook

    # Drain the master
    while True:
        local_offset = postgresql.wal_received_offset(postgresql.connect())
        if local_offset is None:
            # Huh? Should not happen unless the server was unexpectedly
            # restarted.
            break

        try:
            remote_con = postgresql.connect(user=replication_username(),
                                            unit=master)
            remote_offset = postgresql.wal_received_offset(remote_con)
            if remote_offset is None:
                # Huh? Should not happen either, since the master published
                # that it is following us.
                break
        except (psycopg2.Error, postgresql.InvalidConnection) as x:
            status_set(
                "waiting",
                "Waiting to query replication state of {}: {}"
                "".format(master, x),
            )
            time.sleep(1)
            continue

        if local_offset >= remote_offset:
            break  # In sync. Proceed to promotion.

        status_set(
            "waiting",
            "{} bytes to sync before promotion"
            "".format(remote_offset - local_offset),
        )
        time.sleep(1)

    # Promote the anointed to master
    promote_to_master()
    switchover_status()
Beispiel #5
0
def authorized_by(unit):
    # Ensure that this unit is listed as authorized by the given unit,
    # and the given unit has provided connection details. The check for
    # connection details is needed in case we are upgrading from an
    # older charm and the remote unit has not yet run its upgrade-charm
    # hook and provided the necessary information.
    peer = helpers.get_peer_relation()
    if peer is None or unit not in peer:
        return False
    authorized = set(peer[unit].get("allowed-units", "").split())
    return "host" in peer[unit] and hookenv.local_unit() in authorized
Beispiel #6
0
def publish_following():
    # Advertise the unit we are following, in the hook that we actually
    # restart and this change actually takes effect. This pings any
    # anointed master during switchover, allowing it to proceed onto
    # the promotion step.
    peer_rel = helpers.get_peer_relation()
    following = get_following()
    if peer_rel is not None:
        peer_rel.local["following"] = following
        reactive.remove_state("postgresql.replication.publish_following")
    if following is None:
        reactive.remove_state("postgresql.replication.check_following")
    if reactive.is_state("postgresql.replication.switchover"):
        switchover_status()
Beispiel #7
0
def coordinate_failover():
    """The master has been destroyed. Trigger the failover process."""
    master = get_master()
    rel = helpers.get_peer_relation()

    hookenv.log("Master {} is gone".format(master), WARNING)

    # Per Bug #1417874, the master doesn't know it is dying until it
    # is too late, and standbys learn about their master dying at
    # different times. We need to wait until all remaining units
    # are aware that the master is gone, and are no longer following
    # it. If we fail to do this step, then we risk appointing a new
    # master while some units are still replicating data from
    # the ex-master and we will end up with diverged timelines.
    # Unfortunately, this means failover will not complete until
    # hooks can be run on all remaining units, which could be several
    # hours if maintenance operations are in progress. Once
    # Bug #1417874 is addressed, the departing master
    # can cut off replication to all units simultaneously and we
    # can skip this step and allow failover to occur as soon as the
    # leader learns that the master is gone. Or can we? A network
    # partition could stop the controller seeing the master, and
    # any about-to-depart hooks will not be triggered, with the same
    # problem detailed above. pg_rewind and repmgr may also offer
    # alternatives, repairing the diverged timeline rather than
    # avoiding it. But pg_rewind only copes with timeline switches
    # in PG9.6+, which means we can't promote, which risks wal shipping
    # collisions between the old and new masters.
    waiting_on = set()
    for unit, relinfo in rel.items():
        if relinfo.get("following"):
            hookenv.log("Waiting for {} to stop replicating ex-master"
                        "".format(unit))
            waiting_on.add(unit)
    if rel.local.get("following"):
        # following from the relation, rather than get_following(),
        # to ensure that the change has been applied.
        hookenv.log("Waiting for me to stop replicating ex-master")
        waiting_on.add(hookenv.local_unit())
    if not waiting_on:
        new_master = elect_master()
        hookenv.log("Failing over to new master {}".format(new_master),
                    WARNING)
        set_master(new_master)
    else:
        status_set(
            None,
            "Coordinating failover. Waiting on {}"
            "".format(",".join(sorted(waiting_on))),
        )
Beispiel #8
0
def get_anointed():
    """The unit anointed to become master in switchover (not failover)"""
    if reactive.is_state("postgresql.replication.manual"):
        return None
    anointed = leader_get("anointed_master")
    if anointed == hookenv.local_unit():
        return anointed
    peer_rel = helpers.get_peer_relation()
    if peer_rel and anointed in peer_rel:
        return anointed
    # If this unit is being torn down, there is the perverse
    # case where the anointed master is no longer in the
    # peer relation. This probably will never happen outside
    # of test suites.
    return None
Beispiel #9
0
def switchover_status():
    update_replication_states()
    anointed = get_anointed()

    # From the peer relation, to match what is published after restart.
    # unitdata copy is set before restart.
    peer_rel = helpers.get_peer_relation()
    following = peer_rel.local.get("following")

    mode = "Primary" if reactive.is_state(
        "postgresql.replication.is_primary") else "Secondary"

    hookenv.status_set(
        "maintenance",
        "Switchover to {}. {} following {}"
        "".format(anointed, mode, str(following)),
    )
Beispiel #10
0
def elect_master():
    """Elect a new master after the old one has departed.

    The new master is the secondary that has replayed the most
    WAL data. There must be no hot standbys still replicating
    data from the previous master, or we may end up with diverged
    timelines.

    Note we check replayed wal instead of received wal, because the
    servers have just been restarted with no master and information
    about received wal lost.
    """
    rel = helpers.get_peer_relation()
    local_unit = hookenv.local_unit()

    # The unit with the most advanced WAL offset should be the new master.
    if postgresql.is_running():
        local_offset = postgresql.wal_replay_offset(postgresql.connect())
        offsets = [(local_offset, local_unit)]
    else:
        offsets = []

    for unit, relinfo in rel.items():
        try:
            con = postgresql.connect(user=replication_username(), unit=unit)
            offsets.append((postgresql.wal_replay_offset(con), unit))
        except (psycopg2.Error, postgresql.InvalidConnection) as x:
            hookenv.log(
                "Unable to query replication state of {}: {}"
                "".format(unit, x),
                WARNING,
            )
            # TODO: Signal re-cloning required. Or autodetect
            # based on timeline switch. Or PG9.3+ could use pg_rewind.

    offsets.sort()
    if not offsets:
        # This should only happen if we failover before replication has
        # been setup, like a test suite destroying units without waiting
        # for the initial deployment to complete.
        status_set("blocked", "No candidates for master found!")
        raise SystemExit(0)
    elected_master = offsets[0][1]
    return elected_master
Beispiel #11
0
def check_switchover_complete():
    peer_rel = helpers.get_peer_relation()
    anointed = get_anointed()

    if anointed is None:
        # switchover target is gone. Hopefully the service
        # is being torn down, because this otherwise shouldn't happen.
        # Reverting to the existing master should work
        leader_set(anointed_master=None)
        update_replication_states()
        return

    if anointed == hookenv.local_unit():
        anointed_relinfo = peer_rel.local
    else:
        anointed_relinfo = peer_rel[anointed]
    if anointed_relinfo.get("following") is None:
        leader_set(master=anointed, anointed_master=None)
        hookenv.log("Switchover to {} complete".format(anointed))
        update_replication_states()
    else:
        hookenv.log("Switchover to {} continues".format(anointed))

    switchover_status()
Beispiel #12
0
def switchover_action():
    try:
        params = hookenv.action_get()
        anointed = params["master"]
        master = get_master()

        if not master:
            hookenv.action_fail("There is no master. Cannot switchover")
            return

        if not anointed:
            hookenv.action_fail("anointed master was not specified")
            return

        if master == anointed:
            hookenv.action_set(
                dict(result="{} is already master"
                     "".format(anointed)))
            return

        peer_rel = helpers.get_peer_relation()
        if anointed != hookenv.local_unit() and anointed not in peer_rel:
            hookenv.action_fail("Invalid unit name {}".format(anointed))
            return

        leader_set(anointed_master=anointed)
        update_replication_states()

        switchover_status()

        hookenv.action_set(
            dict(result="Initiated switchover of master to {}"
                 "".format(anointed)))

    finally:
        reactive.remove_state("action.switchover")
Beispiel #13
0
def ensure_viable_postgresql_conf(opts):
    def force(**kw):
        for k, v in kw.items():
            if opts.get(k) != v:
                hookenv.log("Setting {} to {}".format(k, v), DEBUG)
                opts[k] = v

    config = hookenv.config()
    rels = context.Relations()

    # Number of standby units - count peers and 'master' relations.
    num_standbys = len(helpers.get_peer_relation() or {})
    for rel in rels["master"].values():
        num_standbys += len(rel)

    num_clients = 0
    for rel in list(rels["db"]) + list(rels["db-admin"]):
        num_clients += len(rel)

    # Even without replication, replication slots get used by
    # pg_basebackup(1). Bump up max_wal_senders so things work. It is
    # cheap, so perhaps we should just pump it to several thousand.
    min_wal_senders = num_standbys * 2 + 5
    if min_wal_senders > int(opts.get("max_wal_senders", 0)):
        force(max_wal_senders=min_wal_senders)

    # We used to calculate a minimum max_connections here, ensuring
    # that we had at least one per client and enough for replication
    # and backups. It wasn't much use though, as the major variable
    # is not the number of clients but how many connections the
    # clients open (connection pools of 20 or more are not uncommon).
    # lp:1594667 required the calculation to be synchronized, or just
    # removed. So removed to avoid complexity for dubious gains.
    #
    # max_connections. One per client unit, plus replication.
    # max_wal_senders = int(opts.get('max_wal_senders', 0))
    # assert max_wal_senders > 0
    # min_max_connections = max_wal_senders + max(1, num_clients)
    #
    min_max_connections = 100
    if min_max_connections > int(opts.get("max_connections", 0)):
        force(max_connections=min_max_connections)

    # We want 'hot_standby' at a minimum, as it lets us run
    # pg_basebackup() and it is recommended over the more
    # minimal 'archive'. Is it worth only enabling the higher-still
    # 'logical' level only when necessary? How do we detect that?
    force(hot_standby=True)
    if postgresql.has_version("9.4"):
        force(wal_level="logical")
    else:
        force(wal_level="hot_standby")

    # Having two config options for the one setting is confusing. Perhaps
    # we should deprecate this.
    if num_standbys and (int(config["replicated_wal_keep_segments"]) > int(opts.get("wal_keep_segments", 0))):
        force(wal_keep_segments=config["replicated_wal_keep_segments"])

    # Log shipping with WAL-E.
    if config["wal_e_storage_uri"]:
        force(archive_mode="on")  # Boolean pre-9.5, enum 9.5+
        force(archive_command=wal_e.wal_e_archive_command())

    # Log destinations for syslog. This charm only supports standard
    # Debian logging, or Debian + syslog. This will grow more complex in
    # the future, as the local logs are redundant if you are using syslog
    # for log aggregation, and we will want to add csvlog because it is
    # so much easier to parse.
    if context.Relations()["syslog"]:
        force(
            log_destination="stderr,syslog",
            syslog_ident=hookenv.local_unit().replace("/", "_"),
        )
Beispiel #14
0
def generate_pg_hba_conf(pg_hba, config, rels, _peer_rel=None):
    """Update the pg_hba.conf file (host based authentication)."""
    rules = []  # The ordered list, as tuples.

    # local      database  user  auth-method  [auth-options]
    # host       database  user  address  auth-method  [auth-options]
    # hostssl    database  user  address  auth-method  [auth-options]
    # hostnossl  database  user  address  auth-method  [auth-options]
    # host       database  user  IP-address  IP-mask  auth-method  [auth-opts]
    # hostssl    database  user  IP-address  IP-mask  auth-method  [auth-opts]
    # hostnossl  database  user  IP-address  IP-mask  auth-method  [auth-opts]
    def add(*record):
        rules.append(tuple(record))

    # The charm is running as the root user, and needs to be able to
    # connect as the postgres user to all databases.
    add("local", "all", "postgres", "peer", "map=juju_charm")

    # The local unit needs access to its own database. Let every local
    # user connect to their matching PostgreSQL user, if it exists, and
    # nagios with a password.
    add("local", "all", nagios.nagios_username(), "password")
    add("local", "all", "all", "peer")

    if _peer_rel is None:
        _peer_rel = helpers.get_peer_relation()

    # Peers need replication access as the charm replication user.
    if _peer_rel:
        for peer, relinfo in _peer_rel.items():
            for addr in incoming_addresses(relinfo):
                qaddr = postgresql.quote_identifier(addr)
                # Magic replication database, for replication.
                add(
                    "host",
                    "replication",
                    replication.replication_username(),
                    qaddr,
                    "md5",
                    "# {}".format(relinfo),
                )
                # postgres db, so leader can query replication status.
                add(
                    "host",
                    "postgres",
                    replication.replication_username(),
                    qaddr,
                    "md5",
                    "# {}".format(relinfo),
                )

    # Clients need access to the relation database as the relation users.
    for rel in rels["db"].values():
        if "user" in rel.local:
            for relinfo in rel.values():
                for addr in incoming_addresses(relinfo):
                    # Quote everything, including the address, to disenchant
                    # magic tokens like 'all'.
                    add(
                        "host",
                        postgresql.quote_identifier(rel.local["database"]),
                        postgresql.quote_identifier(rel.local["user"]),
                        postgresql.quote_identifier(addr),
                        "md5",
                        "# {}".format(relinfo),
                    )
                    add(
                        "host",
                        postgresql.quote_identifier(rel.local["database"]),
                        postgresql.quote_identifier(rel.local["schema_user"]),
                        postgresql.quote_identifier(addr),
                        "md5",
                        "# {}".format(relinfo),
                    )

    # Admin clients need access to all databases as any user, not just the
    # relation user. Most clients will just use the user provided them,
    # but proxies such as pgbouncer need to open connections as the accounts
    # it creates.
    for rel in rels["db-admin"].values():
        if "user" in rel.local:
            for relinfo in rel.values():
                for addr in incoming_addresses(relinfo):
                    add(
                        "host",
                        "all",
                        "all",
                        postgresql.quote_identifier(addr),
                        "md5",
                        "# {}".format(relinfo),
                    )

    # External replication connections. Somewhat different than before
    # as the relation gets its own user to avoid sharing credentials,
    # and logical replication connections will want to specify the
    # database name.
    for rel in rels["master"].values():
        for relinfo in rel.values():
            for addr in incoming_addresses(relinfo):
                add(
                    "host",
                    "replication",
                    postgresql.quote_identifier(rel.local["user"]),
                    postgresql.quote_identifier(addr),
                    "md5",
                    "# {}".format(relinfo),
                )
                if "database" in rel.local:
                    add(
                        "host",
                        postgresql.quote_identifier(rel.local["database"]),
                        postgresql.quote_identifier(rel.local["user"]),
                        postgresql.quote_identifier(addr),
                        "md5",
                        "# {}".format(relinfo),
                    )

    # External administrative addresses, if specified by the operator.
    for addr in config["admin_addresses"].split(","):
        if addr:
            add(
                "host",
                "all",
                "all",
                postgresql.quote_identifier(postgresql.addr_to_range(addr)),
                "md5",
                "# admin_addresses config",
            )

    # And anything-goes rules, if specified by the operator.
    for line in helpers.split_extra_pg_auth(config["extra_pg_auth"]):
        add(line + " # extra_pg_auth config")

    # Deny everything else
    add("local", "all", "all", "reject", "# Refuse by default")
    add("host", "all", "all", "all", "reject", "# Refuse by default")

    # Strip out the existing juju managed section
    start_mark = "### BEGIN JUJU SETTINGS ###"
    end_mark = "### END JUJU SETTINGS ###"
    pg_hba = re.sub(
        r"^\s*{}.*^\s*{}\s*$".format(re.escape(start_mark), re.escape(end_mark)),
        "",
        pg_hba,
        flags=re.I | re.M | re.DOTALL,
    )

    # Comment out any uncommented lines
    pg_hba = re.sub(r"^\s*([^#\s].*)$", r"# juju # \1", pg_hba, flags=re.M)

    # Spit out the updated file
    rules.insert(0, (start_mark,))
    rules.append((end_mark,))
    pg_hba += "\n" + "\n".join(" ".join(rule) for rule in rules)
    return pg_hba
Beispiel #15
0
def upgrade_charm():
    workloadstatus.status_set("maintenance", "Upgrading charm")

    rels = context.Relations()

    # The master is now appointed by the leader.
    if hookenv.is_leader():
        master = replication.get_master()
        if not master:
            master = hookenv.local_unit()
            peer_rel = helpers.get_peer_relation()
            if peer_rel:
                for peer_relinfo in peer_rel.values():
                    if peer_relinfo.get("state") == "master":
                        master = peer_relinfo.unit
                        break
            hookenv.log("Discovered {} is the master".format(master))
            leadership.leader_set(master=master)

    # The name of this crontab has changed. It will get regenerated.
    if os.path.exists("/etc/cron.d/postgresql"):
        hookenv.log("Removing old crontab")
        os.unlink("/etc/cron.d/postgresql")

    # Older generated usernames where generated from the relation id,
    # and really old ones contained random components. This made it
    # problematic to restore a database into a fresh environment,
    # because the new usernames would not match the old usernames and
    # done of the database permissions would match. We now generate
    # usernames using just the client service name, so restoring a
    # database into a fresh environment will work provided the service
    # names match. We want to update the old usernames in upgraded
    # services to the new format to improve their disaster recovery
    # story.
    for relname, superuser in [("db", False), ("db-admin", True)]:
        for client_rel in rels[relname].values():
            hookenv.log("Migrating database users for {}".format(client_rel))
            password = client_rel.local.get("password", host.pwgen())
            old_username = client_rel.local.get("user")
            new_username = postgresql.username(client_rel.service, superuser,
                                               False)
            if old_username and old_username != new_username:
                migrate_user(old_username, new_username, password, superuser)
                client_rel.local["user"] = new_username
                client_rel.local["password"] = password

            old_username = client_rel.local.get("schema_user")
            if old_username and old_username != new_username:
                migrate_user(old_username, new_username, password, superuser)
                client_rel.local["schema_user"] = new_username
                client_rel.local["schema_password"] = password

    # Admin relations used to get 'all' published as the database name,
    # which was bogus.
    for client_rel in rels["db-admin"].values():
        if client_rel.local.get("database") == "all":
            client_rel.local["database"] = client_rel.service

    # Reconfigure PostgreSQL and republish client relations.
    reactive.remove_state("postgresql.cluster.configured")
    reactive.remove_state("postgresql.client.published")

    # Don't recreate the cluster.
    reactive.set_state("postgresql.cluster.created")

    # Set the postgresql.replication.cloned flag, so we don't rebuild
    # standbys when upgrading the charm from a pre-reactive version.
    reactive.set_state("postgresql.replication.cloned")

    # Publish which node we are following
    peer_rel = helpers.get_peer_relation()
    if peer_rel and "following" not in peer_rel.local:
        following = unitdata.kv().get("postgresql.replication.following")
        if following is None and not replication.is_master():
            following = replication.get_master()
        peer_rel.local["following"] = following

    # Ensure storage that was attached but ignored is no longer ignored.
    if not reactive.is_state("postgresql.storage.pgdata.attached"):
        if hookenv.storage_list("pgdata"):
            storage.attach()

    # Ensure client usernames and passwords match leader settings.
    for relname in ("db", "db-admin"):
        for rel in rels[relname].values():
            del rel.local["user"]
            del rel.local["password"]

    # Ensure the configure version is cached.
    postgresql.version()

    # Skip checks for pre-existing databases, as that has already happened.
    reactive.set_state("postgresql.cluster.initial-check")

    # Reinstall support scripts
    reactive.remove_state("postgresql.cluster.support-scripts")

    # Ensure that systemd is managing the PostgreSQL process
    if host.init_is_systemd(
    ) and not reactive.is_flag_set("postgresql.upgrade.systemd"):
        reactive.set_flag("postgresql.upgrade.systemd")
        if reactive.is_flag_set("postgresql.cluster.is_running"):
            hookenv.log("Restarting PostgreSQL under systemd", hookenv.WARNING)
            reactive.clear_flag("postgresql.cluster.is_running")
            postgresql.stop_pgctlcluster()

    # Update the PGDG source, in case the signing key has changed.
    config = hookenv.config()
    if config["pgdg"]:
        service.add_pgdg_source()
Beispiel #16
0
def update_replication_states():
    """
    Set the following states appropriately:

        postgresql.replication.has_peers

            This unit has peers.

        postgresql.replication.had_peers

            This unit once had peers, but may not any more. The peer
            relation exists.

        postgresql.replication.master.peered

            This unit is peered with the master. It is not the master.

        postgresql.replication.master.authorized

            This unit is peered with and authorized by the master. It is
            not the master.

        postgresql.replication.is_master

            This unit is the master.

        postgresql.replication.has_master

            This unit is the master, or it is peered with and
            authorized by the master.

        postgresql.replication.cloned

            This unit is on the master's timeline. It has been cloned from
            the master, or is the master. Undefined with manual replication.

        postgresql.replication.manual

            Manual replication mode has been selected and the charm
            must not do any replication setup or maintenance.

        postgresql.replication.is_primary

            The unit is writable. It is either the master or manual
            replication mode is in effect.

        postgresql.replication.switchover

            In switchover to a new master. A switchover is a controlled
            failover, where the existing master is available.

        postgresql.replication.is_anointed

            In switchover and this unit is anointed to be the new master.
    """
    peers = helpers.get_peer_relation()
    reactive.toggle_state("postgresql.replication.has_peers", peers)
    if peers:
        reactive.set_state("postgresql.replication.had_peers")

    reactive.toggle_state("postgresql.replication.manual",
                          hookenv.config()["manual_replication"])

    master = get_master()  # None if postgresql.replication.manual state.
    reactive.toggle_state("postgresql.replication.is_master",
                          master == hookenv.local_unit())
    reactive.toggle_state("postgresql.replication.master.peered", peers
                          and master in peers)
    reactive.toggle_state(
        "postgresql.replication.master.authorized",
        peers and master in peers and authorized_by(master),
    )
    ready = reactive.is_state(
        "postgresql.replication.is_master") or reactive.is_state(
            "postgresql.replication.master.authorized")
    reactive.toggle_state("postgresql.replication.has_master", ready)

    anointed = get_anointed()
    reactive.toggle_state("postgresql.replication.switchover",
                          anointed is not None and anointed != master)
    reactive.toggle_state(
        "postgresql.replication.is_anointed",
        anointed is not None and anointed != master
        and anointed == hookenv.local_unit(),
    )

    reactive.toggle_state("postgresql.replication.is_primary",
                          postgresql.is_primary())

    if reactive.is_state("postgresql.replication.is_primary"):
        if reactive.is_state("postgresql.replication.is_master"):
            # If the unit is a primary and the master, it is on the master
            # timeline by definition and gets the 'cloned' state.
            reactive.set_state("postgresql.replication.cloned")
        elif reactive.is_state("postgresql.replication.is_anointed"):
            # The anointed unit retains its 'cloned' state.
            pass
        else:
            # If the unit is a primary and not the master, it is on a
            # divered timeline and needs to lose the 'cloned' state.
            reactive.remove_state("postgresql.replication.cloned")

    cloned = reactive.is_state("postgresql.replication.cloned")
    reactive.toggle_state(
        "postgresql.replication.failover",
        master != hookenv.local_unit() and peers and cloned
        and (master not in peers),
    )
Beispiel #17
0
def clone_master():
    master = get_master()
    peer_rel = helpers.get_peer_relation()
    master_relinfo = peer_rel[master]

    # Be paranoid since we are about to destroy data.
    assert not reactive.helpers.is_state("postgresql.replication.is_master")
    assert not reactive.helpers.is_state("postgresql.cluster.is_running")

    # We use realpath on data_dir as it may have been replaced with
    # a symbolic link, so we empty and recreate the actual directory
    # and the links remain in place.
    data_dir = os.path.realpath(postgresql.data_dir())

    if os.path.exists(data_dir):
        hookenv.log("Removing {} in preparation for clone".format(data_dir))
        shutil.rmtree(data_dir)
    helpers.makedirs(data_dir, mode=0o700, user="******", group="postgres")

    if postgresql.has_version("10"):
        wal_method = "--wal-method=stream"
    else:
        wal_method = "--xlog-method=stream"
    cmd = [
        "sudo",
        "-H",  # -H needed to locate $HOME/.pgpass
        "-u",
        "postgres",
        "pg_basebackup",
        "-D",
        postgresql.data_dir(),
        "-h",
        master_relinfo["host"],
        "-p",
        master_relinfo["port"],
        "--checkpoint=fast",
        "--progress",
        wal_method,
        "--no-password",
        "--username=_juju_repl",
    ]
    hookenv.log("Cloning {} with {}".format(master, " ".join(cmd)))
    status_set("maintenance", "Cloning {}".format(master))
    try:
        # Switch to a directory the postgres user can access.
        with helpers.switch_cwd("/tmp"):
            subprocess.check_call(cmd, universal_newlines=True)
    except subprocess.CalledProcessError as x:
        hookenv.log("Clone failed with {}".format(x), ERROR)
        # We failed, and the local cluster is broken.
        status_set("blocked", "Failed to clone {}".format(master))
        postgresql.drop_cluster()
        reactive.remove_state("postgresql.cluster.configured")
        reactive.remove_state("postgresql.cluster.created")
        # Terminate. We need this hook to exit, rather than enter a loop.
        raise SystemExit(0)

    update_recovery_conf(follow=master)

    reactive.set_state("postgresql.replication.cloned")
    update_replication_states()
Beispiel #18
0
def publish_replication_details():
    peer = helpers.get_peer_relation()
    if peer is not None:
        peer.local["host"] = hookenv.unit_private_ip()
        peer.local["port"] = str(postgresql.port())
        peer.local["allowed-units"] = " ".join(sorted(peer.keys()))