Esempio n. 1
1
def install_administrative_scripts():
    scripts_dir = helpers.scripts_dir()
    logs_dir = helpers.logs_dir()
    helpers.makedirs(scripts_dir, mode=0o755)

    # The database backup script. Most of this is redundant now.
    source = os.path.join(hookenv.charm_dir(), 'scripts', 'pgbackup.py')
    destination = os.path.join(scripts_dir, 'dump-pg-db')
    with open(source, 'r') as f:
        helpers.write(destination, f.read(), mode=0o755)

    backups_dir = helpers.backups_dir()
    helpers.makedirs(backups_dir, mode=0o750,
                     user='******', group='postgres')

    # Generate a wrapper that invokes the backup script for each
    # database.
    data = dict(logs_dir=logs_dir,
                scripts_dir=scripts_dir,
                # backups_dir probably should be deprecated in favour of
                # a juju storage mount.
                backups_dir=backups_dir)
    destination = os.path.join(helpers.scripts_dir(), 'pg_backup_job')
    templating.render('pg_backup_job.tmpl', destination, data,
                      owner='root', group='postgres', perms=0o755)

    # Install the reaper scripts.
    script = 'pgkillidle.py'
    source = os.path.join(hookenv.charm_dir(), 'scripts', script)
    if (reactive.helpers.any_file_changed([source]) or
            not os.path.exists(source)):
        destination = os.path.join(scripts_dir, script)
        with open(source, 'r') as f:
            helpers.write(destination, f.read(), mode=0o755)

    if not os.path.exists(logs_dir):
        helpers.makedirs(logs_dir, mode=0o755, user='******',
                         group='postgres')
        # Create the backups.log file used by the backup wrapper if it
        # does not exist, in order to trigger spurious alerts when a
        # unit is installed, per Bug #1329816.
        helpers.write(helpers.backups_log_path(), '', mode=0o644,
                      user='******', group='postgres')
Esempio n. 2
0
def update_pgpass():
    leader = context.Leader()
    accounts = ["root", "postgres", "ubuntu"]
    for account in accounts:
        path = os.path.expanduser(os.path.join("~{}".format(account), ".pgpass"))
        content = "# Managed by Juju\n" "*:*:*:{}:{}".format(
            replication.replication_username(), leader.get("replication_password")
        )
        helpers.write(path, content, mode=0o600, user=account, group=account)
Esempio n. 3
0
def update_pgpass():
    leader = context.Leader()
    accounts = ['root', 'postgres', 'ubuntu']
    for account in accounts:
        path = os.path.expanduser(os.path.join('~{}'.format(account),
                                               '.pgpass'))
        content = ('# Managed by Juju\n'
                   '*:*:*:{}:{}'.format(replication.replication_username(),
                                        leader.get('replication_password')))
        helpers.write(path, content, mode=0o600, user=account, group=account)
Esempio n. 4
0
def create_pg_ctl_conf():
    contents = textwrap.dedent(
        """\
        # Managed by Juju
        # Automatic pg_ctl configuration
        # This configuration file contains cluster specific options to be passed to
        # pg_ctl(1).

        pg_ctl_options = '-w -t 3600'
        """
    )
    helpers.write(
        postgresql.pg_ctl_conf_path(),
        contents,
        mode=0o644,
        user="******",
        group="postgres",
    )
    reactive.set_flag("postgresql.cluster.pg_ctl_conf.created")
Esempio n. 5
0
def write_metrics_cronjob():
    config = hookenv.config()
    path = os.path.join(helpers.cron_dir(), 'juju-postgresql-metrics')

    # Validated in preflight.block_on_invalid_config()
    metrics_target = config['metrics_target'].strip()
    metrics_sample_interval = config['metrics_sample_interval']

    reactive.remove_state('postgresql.metrics.needs_update')

    if not metrics_target:
        if os.path.exists(path):
            hookenv.log('Turning off metrics cronjob')
            os.unlink(path)
        return

    charm_dir = hookenv.charm_dir()
    statsd_host, statsd_port = metrics_target.split(':', 1)
    metrics_prefix = config['metrics_prefix'].strip()
    metrics_prefix = metrics_prefix.replace(
        "$UNIT", hookenv.local_unit().replace('.', '-').replace('/', '-'))

    # ensure script installed
    charm_script = os.path.join(charm_dir, 'files', 'metrics',
                                'postgres_to_statsd.py')
    script_path = os.path.join(helpers.scripts_dir(), 'postgres_to_statsd.py')
    with open(charm_script, 'r') as f:
        helpers.write(script_path, f.read(), mode=0o755)

    # write the crontab
    data = dict(interval=config['metrics_sample_interval'],
                script_path=script_path,
                metrics_prefix=metrics_prefix,
                metrics_sample_interval=metrics_sample_interval,
                statsd_host=statsd_host,
                statsd_port=statsd_port)
    templating.render('metrics_cronjob.template', charm_script, data,
                      perms=0o644)
Esempio n. 6
0
def write_metrics_cronjob():
    config = hookenv.config()
    path = os.path.join(helpers.cron_dir(), "juju-postgresql-metrics")

    # Validated in preflight.block_on_invalid_config()
    metrics_target = config["metrics_target"].strip()
    metrics_sample_interval = config["metrics_sample_interval"]

    reactive.remove_state("postgresql.metrics.needs_update")

    if not metrics_target:
        if os.path.exists(path):
            hookenv.log("Turning off metrics cronjob")
            os.unlink(path)
        return

    charm_dir = hookenv.charm_dir()
    statsd_host, statsd_port = metrics_target.split(":", 1)
    metrics_prefix = config["metrics_prefix"].strip()
    metrics_prefix = metrics_prefix.replace("$UNIT", hookenv.local_unit().replace(".", "-").replace("/", "-"))

    # ensure script installed
    charm_script = os.path.join(charm_dir, "files", "metrics", "postgres_to_statsd.py")
    script_path = os.path.join(helpers.scripts_dir(), "postgres_to_statsd.py")
    with open(charm_script, "r") as f:
        helpers.write(script_path, f.read(), mode=0o755)

    # write the crontab
    data = dict(
        interval=config["metrics_sample_interval"],
        script_path=script_path,
        metrics_prefix=metrics_prefix,
        metrics_sample_interval=metrics_sample_interval,
        statsd_host=statsd_host,
        statsd_port=statsd_port,
    )
    templating.render("metrics_cronjob.template", charm_script, data, perms=0o644)
Esempio n. 7
0
def install_administrative_scripts():
    scripts_dir = helpers.scripts_dir()
    logs_dir = helpers.logs_dir()
    helpers.makedirs(scripts_dir, mode=0o755)

    # The database backup script. Most of this is redundant now.
    source = os.path.join(hookenv.charm_dir(), "scripts", "pgbackup.py")
    destination = os.path.join(scripts_dir, "dump-pg-db")
    with open(source, "r") as f:
        helpers.write(destination, f.read(), mode=0o755)

    backups_dir = helpers.backups_dir()
    helpers.makedirs(backups_dir, mode=0o750, user="******", group="postgres")

    # Generate a wrapper that invokes the backup script for each
    # database.
    data = dict(
        logs_dir=logs_dir,
        scripts_dir=scripts_dir,
        # backups_dir probably should be deprecated in favour of
        # a juju storage mount.
        backups_dir=backups_dir,
    )
    destination = os.path.join(helpers.scripts_dir(), "pg_backup_job")
    templating.render(
        "pg_backup_job.tmpl",
        destination,
        data,
        owner="root",
        group="postgres",
        perms=0o755,
    )

    # Install the reaper scripts.
    script = "pgkillidle.py"
    source = os.path.join(hookenv.charm_dir(), "scripts", script)
    destination = os.path.join(scripts_dir, script)
    if reactive.helpers.any_file_changed([source]) or not os.path.exists(destination):
        with open(source, "r") as f:
            helpers.write(destination, f.read(), mode=0o755)

    if not os.path.exists(logs_dir):
        helpers.makedirs(logs_dir, mode=0o755, user="******", group="postgres")
        # Create the backups.log file used by the backup wrapper if it
        # does not exist, in order to trigger spurious alerts when a
        # unit is installed, per Bug #1329816.
        helpers.write(
            helpers.backups_log_path(),
            "",
            mode=0o644,
            user="******",
            group="postgres",
        )

    reactive.set_state("postgresql.cluster.support-scripts")
Esempio n. 8
0
def update_wal_e_env_dir():
    '''Regenerate the envdir(1) environment used to drive WAL-E.

    We do this even if wal-e is not enabled to ensure we destroy
    any secrets potentially left around from when it was enabled.
    '''
    config = hookenv.config()
    env = dict(
        # wal-e Swift creds
        SWIFT_AUTHURL=config.get('os_auth_url', ''),
        SWIFT_TENANT=config.get('os_tenant_name', ''),
        SWIFT_USER=config.get('os_username', ''),
        SWIFT_PASSWORD=config.get('os_password', ''),

        # wal-e AWS creds
        AWS_ACCESS_KEY_ID=config.get('aws_access_key_id', ''),
        AWS_SECRET_ACCESS_KEY=config.get('aws_secret_access_key', ''),

        # wal-e Azure cred
        WABS_ACCOUNT_NAME=config.get('wabs_account_name', ''),
        WABS_ACCESS_KEY=config.get('wabs_access_key', ''),

        # OpenStack creds for swift(1) cli tool
        OS_AUTH_URL=config.get('os_auth_url', ''),
        OS_USERNAME=config.get('os_username', ''),
        OS_PASSWORD=config.get('os_password', ''),
        OS_TENANT_NAME=config.get('os_tenant_name', ''),

        WALE_SWIFT_PREFIX='',
        WALE_S3_PREFIX='',
        WALE_WABS_PREFIX='')

    uri = config.get('wal_e_storage_uri', None)
    if uri:
        required_env = []
        parsed_uri = urlparse(uri)
        if parsed_uri.scheme == 'swift':
            env['WALE_SWIFT_PREFIX'] = uri
            required_env = ['SWIFT_AUTHURL', 'SWIFT_TENANT',
                            'SWIFT_USER', 'SWIFT_PASSWORD']
        elif parsed_uri.scheme == 's3':
            env['WALE_S3_PREFIX'] = uri
            required_env = ['AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY']
        elif parsed_uri.scheme == 'wabs':
            env['WALE_WABS_PREFIX'] = uri
            required_env = ['WABS_ACCOUNT_NAME', 'WABS_ACCESS_KEY']
        else:
            hookenv.log('Invalid wal_e_storage_uri {}'.format(uri), ERROR)

        for env_key in required_env:
            if not env[env_key].strip():
                hookenv.log('Missing {}'.format(env_key), ERROR)

    # Regenerate the envdir(1) environment recommended by WAL-E.
    # All possible keys are rewritten to ensure we remove old secrets.
    helpers.makedirs(wal_e_env_dir(), mode=0o750,
                     user='******', group='postgres')
    for k, v in env.items():
        helpers.write(os.path.join(wal_e_env_dir(), k), v.strip(),
                      mode=0o640, user='******', group='postgres')

    reactive.set_state('postgresql.wal_e.configured')
Esempio n. 9
0
def update_wal_e_env_dir(dirpath, storage_uri):
    """Regenerate the envdir(1) environment used to drive WAL-E.

    We do this even if wal-e is not enabled to ensure we destroy
    any secrets potentially left around from when it was enabled.
    """
    config = hookenv.config()
    env = dict(
        # wal-e Swift creds
        SWIFT_AUTHURL=config.get("os_auth_url", ""),
        SWIFT_USER=config.get("os_username", ""),
        SWIFT_PASSWORD=config.get("os_password", ""),
        SWIFT_TENANT=config.get("os_tenant_name", ""),
        SWIFT_REGION=config.get("os_region_name", ""),
        SWIFT_AUTH_VERSION=config.get("os_identity_api_version", ""),
        SWIFT_USER_DOMAIN_NAME=config.get("os_user_domain_name", ""),
        SWIFT_PROJECT_NAME=config.get("os_project_name", ""),
        SWIFT_PROJECT_DOMAIN_NAME=config.get("os_project_domain_name", ""),
        # wal-e AWS creds
        AWS_ACCESS_KEY_ID=config.get("aws_access_key_id", ""),
        AWS_SECRET_ACCESS_KEY=config.get("aws_secret_access_key", ""),
        AWS_REGION=config.get("aws_region", ""),
        # wal-e Azure cred
        WABS_ACCOUNT_NAME=config.get("wabs_account_name", ""),
        WABS_ACCESS_KEY=config.get("wabs_access_key", ""),
        # OpenStack creds for swift(1) cli tool
        OS_AUTH_URL=config.get("os_auth_url", ""),
        OS_USERNAME=config.get("os_username", ""),
        OS_PASSWORD=config.get("os_password", ""),
        OS_TENANT_NAME=config.get("os_tenant_name", ""),
        OS_REGION_NAME=config.get("os_region_name", ""),
        OS_IDENTITY_API_VERSION=config.get("os_identity_api_version", ""),
        OS_USER_DOMAIN_NAME=config.get("os_user_domain_name", ""),
        OS_PROJECT_NAME=config.get("os_project_name", ""),
        OS_PROJECT_DOMAIN_NAME=config.get("os_project_domain_name", ""),
        WALE_SWIFT_PREFIX="",
        WALE_S3_PREFIX="",
        WALE_WABS_PREFIX="",
    )

    uri = storage_uri
    if uri:
        required_env = []
        parsed_uri = urlparse(uri)
        if parsed_uri.scheme == "swift":
            env["WALE_SWIFT_PREFIX"] = uri
            required_env = [
                "SWIFT_AUTHURL",
                "SWIFT_USER",
                "SWIFT_PASSWORD",
            ]
        elif parsed_uri.scheme == "s3":
            env["WALE_S3_PREFIX"] = uri
            required_env = ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "AWS_REGION"]
        elif parsed_uri.scheme == "wabs":
            env["WALE_WABS_PREFIX"] = uri
            required_env = ["WABS_ACCOUNT_NAME", "WABS_ACCESS_KEY"]
        else:
            hookenv.log("Invalid wal_e_storage_uri {}".format(uri), ERROR)

        for env_key in required_env:
            if not env[env_key].strip():
                hookenv.log("Missing {}".format(env_key), ERROR)

    # Regenerate the envdir(1) environment recommended by WAL-E.
    # All possible keys are rewritten to ensure we remove old secrets.
    helpers.makedirs(dirpath, mode=0o750, user="******", group="postgres")
    for k, v in env.items():
        helpers.write(
            os.path.join(dirpath, k),
            v.strip(),
            mode=0o640,
            user="******",
            group="postgres",
        )
Esempio n. 10
0
def wal_e_restore():
    reactive.remove_state("action.wal-e-restore")
    params = hookenv.action_get()
    backup = params["backup-name"].strip().replace("-", "_")
    storage_uri = params["storage-uri"].strip()

    ship_uri = hookenv.config().get("wal_e_storage_uri")
    if storage_uri == ship_uri:
        hookenv.action_fail(
            "The storage-uri parameter is identical to "
            "the wal_e_storage_uri config setting. Your "
            "restoration source cannot be the same as the "
            "folder you are archiving too to avoid corrupting "
            "the backups."
        )
        return

    if not params["confirm"]:
        m = "Recovery from {}.".format(storage_uri)
        if ship_uri:
            m += "\nContents of {} will be destroyed.".format(ship_uri)
        m += "\nExisting local database will be destroyed."
        m += "\nRerun action with 'confirm=true' to proceed."
        hookenv.action_set({"info": m})
        return

    with tempfile.TemporaryDirectory(prefix="wal-e", suffix="envdir") as envdir:
        update_wal_e_env_dir(envdir, storage_uri)

        # Confirm there is a backup to restore
        backups = wal_e_list_backups(envdir)
        if not backups:
            hookenv.action_fail("No backups found at {}".format(storage_uri))
            return
        if backup != "LATEST" and backup not in (b["name"] for b in backups):
            hookenv.action_fail("Backup {} not found".format(backup))
            return

        # Shutdown PostgreSQL. Note we want this action to run synchronously,
        # so there is no opportunity to ask permission from the leader. If
        # there are other units cloning this database, those clone operations
        # will fail. Which seems preferable to blocking a recovery operation
        # in any case, because if we are doing disaster recovery we generally
        # want to do it right now.
        status_set("maintenance", "Stopping PostgreSQL for backup restoration")
        postgresql.stop()

        # Trash the existing database. Its dangerous to do this first, but
        # we probably need the space.
        data_dir = postgresql.data_dir()  # May be a symlink
        for content in os.listdir(data_dir):
            cpath = os.path.join(data_dir, content)
            if os.path.isdir(cpath) and not os.path.islink(cpath):
                shutil.rmtree(cpath)
            else:
                os.remove(cpath)

        # WAL-E recover
        status_set("maintenance", "Restoring backup {}".format(backup))
        wal_e_run(["backup-fetch", data_dir, backup], envdir=envdir)

        # Create recovery.conf to complete recovery
        is_master = reactive.is_state("postgresql.replication.is_master")
        standby_mode = "off" if is_master else "on"
        if params.get("target-time"):
            target_time = "recovery_target_time='{}'" "".format(params["target-time"])
        else:
            target_time = ""
        target_action = "promote" if is_master else "shutdown"
        immediate = "" if is_master else "recovery_target='immediate'"
        helpers.write(
            postgresql.recovery_conf_path(),
            dedent(
                """\
                             # Managed by Juju. PITR in progress.
                             standby_mode = {}
                             restore_command='{}'
                             recovery_target_timeline = {}
                             recovery_target_action = {}
                             {}
                             {}
                             """
            ).format(
                standby_mode,
                wal_e_restore_command(envdir=envdir),
                params["target-timeline"],
                target_action,
                target_time,
                immediate,
            ),
            mode=0o600,
            user="******",
            group="postgres",
        )

        # Avoid circular import. We could also avoid the import entirely
        # with a sufficiently complex set of handlers in the replication
        # module, but that seems to be a worse solution. Better to break
        # out this action into a separate module.
        from reactive.postgresql import replication

        if is_master:
            if ship_uri:
                # If master, trash the configured wal-e storage. This may
                # contain WAL and backups from the old cluster which will
                # conflict with the new cluster. Hopefully it does not
                # contain anything important, because we have no way to
                # prompt the user for confirmation.
                wal_e_run(["delete", "--confirm", "everything"])

            # Then, wait for recovery and promotion.
            postgresql.start()
            con = postgresql.connect()
            cur = con.cursor()
            while True:
                if postgresql.has_version("10"):
                    cur.execute(
                        """SELECT pg_is_in_recovery(),
                                          pg_last_wal_replay_lsn()"""
                    )
                else:
                    cur.execute(
                        """SELECT pg_is_in_recovery(),
                                          pg_last_xlog_replay_location()"""
                    )
                in_rec, loc = cur.fetchone()
                if not in_rec:
                    break
                status_set("maintenance", "Recovery at {}".format(loc))
                time.sleep(10)
        else:
            # If standby, startup and wait for recovery to complete and
            # shutdown.
            status_set("maintenance", "Recovery")
            # Startup might shutdown immediately and look like a failure.
            postgresql.start(ignore_failure=True)
            # No recovery point status yet for standbys, as we would need
            # to handle connection failures when the DB shuts down. We
            # should do this.
            while postgresql.is_running():
                time.sleep(5)
            replication.update_recovery_conf(follow=replication.get_master())

    # Reactive handlers will deal with the rest of the cleanup.
    # eg. ensuring required users and roles exist
    replication.update_replication_states()
    reactive.remove_state("postgresql.cluster.configured")
    reactive.toggle_state("postgresql.cluster.is_running", postgresql.is_running())
    reactive.remove_state("postgresql.nagios.user_ensured")
    reactive.remove_state("postgresql.replication.replication_user_created")
    reactive.remove_state("postgresql.client.published")
Esempio n. 11
0
def update_nagios_pgpass():
    leader = context.Leader()
    nagios_password = leader["nagios_password"]
    content = "*:*:*:{}:{}".format(nagios_username(), nagios_password)
    helpers.write(nagios_pgpass_path(), content, mode=0o600, user="******", group="nagios")
Esempio n. 12
0
def update_nrpe_config():
    update_nagios_pgpass()
    nrpe = NRPE()

    user = nagios_username()
    port = postgresql.port()
    nrpe.add_check(
        shortname="pgsql",
        description="Check pgsql",
        check_cmd="check_pgsql -P {} -l {}".format(port, user),
    )

    # copy the check script which will run cronned as postgres user
    with open("scripts/find_latest_ready_wal.py") as fh:
        check_script = fh.read()

    check_script_path = "{}/{}".format(helpers.scripts_dir(), "find_latest_ready_wal.py")
    helpers.write(check_script_path, check_script, mode=0o755)

    # create an (empty) file with appropriate permissions for the above
    check_output_path = "/var/lib/nagios/postgres-wal-max-age.txt"
    if not os.path.exists(check_output_path):
        helpers.write(check_output_path, b"0\n", mode=0o644, user="******", group="postgres")

    # retrieve the threshold values from the charm config
    config = hookenv.config()
    check_warn_threshold = config["wal_archive_warn_threshold"] or 0
    check_crit_threshold = config["wal_archive_crit_threshold"] or 0

    check_cron_path = "/etc/cron.d/postgres-wal-archive-check"
    if check_warn_threshold and check_crit_threshold:
        # create the cron job to run the above
        check_cron = "*/2 * * * * postgres {}".format(check_script_path)
        helpers.write(check_cron_path, check_cron, mode=0o644)

    # copy the nagios plugin which will check the cronned output
    with open("scripts/check_latest_ready_wal.py") as fh:
        check_script = fh.read()
    check_script_path = "{}/{}".format("/usr/local/lib/nagios/plugins", "check_latest_ready_wal.py")
    helpers.write(check_script_path, check_script, mode=0o755)

    # write the nagios check definition
    nrpe.add_check(
        shortname="pgsql_stale_wal",
        description="Check for stale WAL backups",
        check_cmd="{} {} {}".format(check_script_path, check_warn_threshold, check_crit_threshold),
    )

    if reactive.is_state("postgresql.replication.is_master"):
        # TODO: These should be calculated from the backup schedule,
        # which is difficult since that is specified in crontab format.
        warn_age = 172800
        crit_age = 194400
        backups_log = helpers.backups_log_path()
        nrpe.add_check(
            shortname="pgsql_backups",
            description="Check pgsql backups",
            check_cmd=("check_file_age -w {} -c {} -f {}" "".format(warn_age, crit_age, backups_log)),
        )
    else:
        # Standbys don't do backups. We still generate a check though,
        # to ensure alerts get through to monitoring after a failover.
        nrpe.add_check(
            shortname="pgsql_backups",
            description="Check pgsql backups",
            check_cmd=r"check_dummy 0 standby_does_not_backup",
        )
    nrpe.write()
    reactive.remove_state("postgresql.nagios.needs_update")
Esempio n. 13
0
def update_nagios_pgpass():
    leader = context.Leader()
    nagios_password = leader["nagios_password"]
    content = "*:*:*:{}:{}".format(nagios_username(), nagios_password)
    helpers.write(nagios_pgpass_path(), content, mode=0o600, user="******", group="nagios")