Exemple #1
0
def create_cluster():
    '''Sets the postgresql.cluster.created state.'''
    assert not os.path.exists(postgresql.postgresql_conf_path()), \
        'inhibit_default_cluster_creation() failed'
    assert not os.path.exists(postgresql.data_dir())
    postgresql.create_cluster()
    reactive.set_state('postgresql.cluster.created')
def remount():
    if reactive.is_state('postgresql.cluster.is_running'):
        # Attempting this while PostgreSQL is live would be really, really bad.
        service.stop()

    old_data_dir = postgresql.data_dir()
    new_data_dir = os.path.join(external_volume_mount, 'postgresql',
                                postgresql.version(), 'main')
    backup_data_dir = '{}-{}'.format(old_data_dir, int(time.time()))

    if os.path.isdir(new_data_dir):
        hookenv.log('Remounting existing database at {}'.format(new_data_dir),
                    WARNING)
    else:
        status_set('maintenance',
                   'Migrating data from {} to {}'.format(old_data_dir,
                                                         new_data_dir))
        helpers.makedirs(new_data_dir, mode=0o770,
                         user='******', group='postgres')
        try:
            rsync_cmd = ['rsync', '-av',
                         old_data_dir + '/',
                         new_data_dir + '/']
            hookenv.log('Running {}'.format(' '.join(rsync_cmd)), DEBUG)
            subprocess.check_call(rsync_cmd)
        except subprocess.CalledProcessError:
            status_set('blocked',
                       'Failed to sync data from {} to {}'
                       ''.format(old_data_dir, new_data_dir))
            return

    os.replace(old_data_dir, backup_data_dir)
    os.symlink(new_data_dir, old_data_dir)
    fix_perms(new_data_dir)
    reactive.remove_state('postgresql.storage.needs_remount')
 def test_simple_paths(self, version):
     # We have a pile of trivial helpers to get directory and file
     # paths. We use these for consistency and ease of mocking.
     version.return_value = "9.9"
     self.assertEqual(postgresql.config_dir(), "/etc/postgresql/9.9/main")
     self.assertEqual(postgresql.data_dir(), "/var/lib/postgresql/9.9/main")
     self.assertEqual(postgresql.postgresql_conf_path(), "/etc/postgresql/9.9/main/postgresql.conf")
     self.assertEqual(postgresql.pg_hba_conf_path(), "/etc/postgresql/9.9/main/pg_hba.conf")
     self.assertEqual(postgresql.pg_ident_conf_path(), "/etc/postgresql/9.9/main/pg_ident.conf")
     self.assertEqual(postgresql.recovery_conf_path(), "/var/lib/postgresql/9.9/main/recovery.conf")
     self.assertEqual(postgresql.pg_ctl_path(), "/usr/lib/postgresql/9.9/bin/pg_ctl")
     self.assertEqual(postgresql.postgres_path(), "/usr/lib/postgresql/9.9/bin/postgres")
Exemple #4
0
 def test_simple_paths(self, version):
     # We have a pile of trivial helpers to get directory and file
     # paths. We use these for consistency and ease of mocking.
     version.return_value = '9.9'
     self.assertEqual(postgresql.config_dir(), '/etc/postgresql/9.9/main')
     self.assertEqual(postgresql.data_dir(), '/var/lib/postgresql/9.9/main')
     self.assertEqual(postgresql.postgresql_conf_path(),
                      '/etc/postgresql/9.9/main/postgresql.conf')
     self.assertEqual(postgresql.pg_hba_conf_path(),
                      '/etc/postgresql/9.9/main/pg_hba.conf')
     self.assertEqual(postgresql.pg_ident_conf_path(),
                      '/etc/postgresql/9.9/main/pg_ident.conf')
     self.assertEqual(postgresql.recovery_conf_path(),
                      '/var/lib/postgresql/9.9/main/recovery.conf')
     self.assertEqual(postgresql.pg_ctl_path(),
                      '/usr/lib/postgresql/9.9/bin/pg_ctl')
     self.assertEqual(postgresql.postgres_path(),
                      '/usr/lib/postgresql/9.9/bin/postgres')
Exemple #5
0
def migrate_pgdata():
    '''
    Copy the data from /var/lib/postgresql/9.x/main to the
    new path and replace the original PGDATA with a symlink.
    Note that the original may already be a symlink, either from
    the block storage broker or manual changes by admins.
    '''
    if reactive.is_state('postgresql.cluster.is_running'):
        # Attempting this while PostgreSQL is live would be really, really bad.
        service.stop()

    old_data_dir = postgresql.data_dir()
    new_data_dir = unitdata.kv().get(pgdata_path_key)

    backup_data_dir = '{}-{}'.format(old_data_dir, int(time.time()))

    if os.path.isdir(new_data_dir):
        # This never happens with Juju storage, at least with 2.0,
        # because we have no way of reusing old partitions.
        hookenv.log('Remounting existing database at {}'.format(new_data_dir),
                    WARNING)
    else:
        status_set('maintenance',
                   'Migrating data from {} to {}'.format(old_data_dir,
                                                         new_data_dir))
        helpers.makedirs(new_data_dir, mode=0o770,
                         user='******', group='postgres')
        try:
            rsync_cmd = ['rsync', '-av',
                         old_data_dir + '/',
                         new_data_dir + '/']
            hookenv.log('Running {}'.format(' '.join(rsync_cmd)), DEBUG)
            subprocess.check_call(rsync_cmd, universal_newlines=True)
        except subprocess.CalledProcessError:
            status_set('blocked',
                       'Failed to sync data from {} to {}'
                       ''.format(old_data_dir, new_data_dir))
            return

    os.replace(old_data_dir, backup_data_dir)
    os.symlink(new_data_dir, old_data_dir)
    fix_perms(new_data_dir)
    reactive.set_state('postgresql.storage.pgdata.migrated')
Exemple #6
0
def main():
    if not (reactive.is_state("postgresql.cluster.created") or reactive.is_state("postgresql.cluster.initial-check")):
        # We need to check for existance of an existing database,
        # before the main PostgreSQL package has been installed.
        # If there is one, abort rather than risk destroying data.
        # We need to do this here, as the apt layer may pull in
        # the main PostgreSQL package through dependencies, per
        # lp:1749284
        if os.path.exists(postgresql.postgresql_conf_path()):
            hookenv.status_set(
                "blocked",
                "PostgreSQL config from previous install found at {}".format(postgresql.postgresql_conf_path()),
            )
        elif os.path.exists(postgresql.data_dir()):
            hookenv.status_set(
                "blocked",
                "PostgreSQL database from previous install found at {}".format(postgresql.postgresql.data_dir()),
            )
        else:
            hookenv.log("No pre-existing PostgreSQL database found")
            reactive.set_state("postgresql.cluster.initial-check")

    # Don't trust this state from the last hook. Daemons may have
    # crashed and servers rebooted since then.
    if reactive.is_state("postgresql.cluster.created"):
        try:
            reactive.toggle_state("postgresql.cluster.is_running", postgresql.is_running())
        except subprocess.CalledProcessError as x:
            if not reactive.is_state("workloadstatus.blocked"):
                status_set(
                    "blocked",
                    "Local PostgreSQL cluster is corrupt: {}".format(x.stderr),
                )

    # Reconfigure PostgreSQL. While we don't strictly speaking need
    # to do this every hook, we do need to do this almost every hook,
    # since even things like the number of peers or number of clients
    # can affect minimum viable configuration settings.
    reactive.remove_state("postgresql.cluster.configured")

    log_states()  # Debug noise.
def remount():
    if reactive.is_state("postgresql.cluster.is_running"):
        # Attempting this while PostgreSQL is live would be really, really bad.
        service.stop()

    old_data_dir = postgresql.data_dir()
    new_data_dir = os.path.join(external_volume_mount, "postgresql",
                                postgresql.version(), "main")
    backup_data_dir = "{}-{}".format(old_data_dir, int(time.time()))

    if os.path.isdir(new_data_dir):
        hookenv.log("Remounting existing database at {}".format(new_data_dir),
                    WARNING)
    else:
        status_set(
            "maintenance",
            "Migrating data from {} to {}".format(old_data_dir, new_data_dir),
        )
        helpers.makedirs(new_data_dir,
                         mode=0o770,
                         user="******",
                         group="postgres")
        try:
            rsync_cmd = [
                "rsync", "-av", old_data_dir + "/", new_data_dir + "/"
            ]
            hookenv.log("Running {}".format(" ".join(rsync_cmd)), DEBUG)
            subprocess.check_call(rsync_cmd)
        except subprocess.CalledProcessError:
            status_set(
                "blocked",
                "Failed to sync data from {} to {}"
                "".format(old_data_dir, new_data_dir),
            )
            return

    os.replace(old_data_dir, backup_data_dir)
    os.symlink(new_data_dir, old_data_dir)
    fix_perms(new_data_dir)
    reactive.remove_state("postgresql.storage.needs_remount")
Exemple #8
0
def attach():
    mount = hookenv.storage_get()['location']
    pgdata = os.path.join(mount, postgresql.version(), 'main')
    unitdata.kv().set(pgdata_mount_key, mount)
    unitdata.kv().set(pgdata_path_key, pgdata)

    hookenv.log('PGDATA storage attached at {}'.format(mount))

    # Never happens with Juju 2.0 as we can't reuse an old mount. This
    # check is here for the future.
    existingdb = os.path.exists(pgdata)

    required_space = shutil.disk_usage(postgresql.data_dir()).used
    free_space = shutil.disk_usage(mount).free

    if required_space > free_space and not existingdb:
        hookenv.status_set('blocked',
                           'Not enough free space in pgdata storage')
    else:
        apt.queue_install(['rsync'])
        coordinator.acquire('restart')
        reactive.set_state('postgresql.storage.pgdata.attached')
def needs_remount():
    mounted = os.path.isdir(external_volume_mount)
    linked = os.path.islink(postgresql.data_dir())
    return mounted and not linked
Exemple #10
0
def wal_e_backup_command():
    return 'envdir {} wal-e backup-push {}'.format(wal_e_env_dir(),
                                                   postgresql.data_dir())
Exemple #11
0
def wal_e_restore():
    reactive.remove_state("action.wal-e-restore")
    params = hookenv.action_get()
    backup = params["backup-name"].strip().replace("-", "_")
    storage_uri = params["storage-uri"].strip()

    ship_uri = hookenv.config().get("wal_e_storage_uri")
    if storage_uri == ship_uri:
        hookenv.action_fail(
            "The storage-uri parameter is identical to "
            "the wal_e_storage_uri config setting. Your "
            "restoration source cannot be the same as the "
            "folder you are archiving too to avoid corrupting "
            "the backups."
        )
        return

    if not params["confirm"]:
        m = "Recovery from {}.".format(storage_uri)
        if ship_uri:
            m += "\nContents of {} will be destroyed.".format(ship_uri)
        m += "\nExisting local database will be destroyed."
        m += "\nRerun action with 'confirm=true' to proceed."
        hookenv.action_set({"info": m})
        return

    with tempfile.TemporaryDirectory(prefix="wal-e", suffix="envdir") as envdir:
        update_wal_e_env_dir(envdir, storage_uri)

        # Confirm there is a backup to restore
        backups = wal_e_list_backups(envdir)
        if not backups:
            hookenv.action_fail("No backups found at {}".format(storage_uri))
            return
        if backup != "LATEST" and backup not in (b["name"] for b in backups):
            hookenv.action_fail("Backup {} not found".format(backup))
            return

        # Shutdown PostgreSQL. Note we want this action to run synchronously,
        # so there is no opportunity to ask permission from the leader. If
        # there are other units cloning this database, those clone operations
        # will fail. Which seems preferable to blocking a recovery operation
        # in any case, because if we are doing disaster recovery we generally
        # want to do it right now.
        status_set("maintenance", "Stopping PostgreSQL for backup restoration")
        postgresql.stop()

        # Trash the existing database. Its dangerous to do this first, but
        # we probably need the space.
        data_dir = postgresql.data_dir()  # May be a symlink
        for content in os.listdir(data_dir):
            cpath = os.path.join(data_dir, content)
            if os.path.isdir(cpath) and not os.path.islink(cpath):
                shutil.rmtree(cpath)
            else:
                os.remove(cpath)

        # WAL-E recover
        status_set("maintenance", "Restoring backup {}".format(backup))
        wal_e_run(["backup-fetch", data_dir, backup], envdir=envdir)

        # Create recovery.conf to complete recovery
        is_master = reactive.is_state("postgresql.replication.is_master")
        standby_mode = "off" if is_master else "on"
        if params.get("target-time"):
            target_time = "recovery_target_time='{}'" "".format(params["target-time"])
        else:
            target_time = ""
        target_action = "promote" if is_master else "shutdown"
        immediate = "" if is_master else "recovery_target='immediate'"
        helpers.write(
            postgresql.recovery_conf_path(),
            dedent(
                """\
                             # Managed by Juju. PITR in progress.
                             standby_mode = {}
                             restore_command='{}'
                             recovery_target_timeline = {}
                             recovery_target_action = {}
                             {}
                             {}
                             """
            ).format(
                standby_mode,
                wal_e_restore_command(envdir=envdir),
                params["target-timeline"],
                target_action,
                target_time,
                immediate,
            ),
            mode=0o600,
            user="******",
            group="postgres",
        )

        # Avoid circular import. We could also avoid the import entirely
        # with a sufficiently complex set of handlers in the replication
        # module, but that seems to be a worse solution. Better to break
        # out this action into a separate module.
        from reactive.postgresql import replication

        if is_master:
            if ship_uri:
                # If master, trash the configured wal-e storage. This may
                # contain WAL and backups from the old cluster which will
                # conflict with the new cluster. Hopefully it does not
                # contain anything important, because we have no way to
                # prompt the user for confirmation.
                wal_e_run(["delete", "--confirm", "everything"])

            # Then, wait for recovery and promotion.
            postgresql.start()
            con = postgresql.connect()
            cur = con.cursor()
            while True:
                if postgresql.has_version("10"):
                    cur.execute(
                        """SELECT pg_is_in_recovery(),
                                          pg_last_wal_replay_lsn()"""
                    )
                else:
                    cur.execute(
                        """SELECT pg_is_in_recovery(),
                                          pg_last_xlog_replay_location()"""
                    )
                in_rec, loc = cur.fetchone()
                if not in_rec:
                    break
                status_set("maintenance", "Recovery at {}".format(loc))
                time.sleep(10)
        else:
            # If standby, startup and wait for recovery to complete and
            # shutdown.
            status_set("maintenance", "Recovery")
            # Startup might shutdown immediately and look like a failure.
            postgresql.start(ignore_failure=True)
            # No recovery point status yet for standbys, as we would need
            # to handle connection failures when the DB shuts down. We
            # should do this.
            while postgresql.is_running():
                time.sleep(5)
            replication.update_recovery_conf(follow=replication.get_master())

    # Reactive handlers will deal with the rest of the cleanup.
    # eg. ensuring required users and roles exist
    replication.update_replication_states()
    reactive.remove_state("postgresql.cluster.configured")
    reactive.toggle_state("postgresql.cluster.is_running", postgresql.is_running())
    reactive.remove_state("postgresql.nagios.user_ensured")
    reactive.remove_state("postgresql.replication.replication_user_created")
    reactive.remove_state("postgresql.client.published")
Exemple #12
0
def wal_e_backup_command():
    return "/snap/bin/wal-e.envdir {} /snap/bin/wal-e backup-push {}".format(wal_e_env_dir(), postgresql.data_dir())
def clone_master():
    master = get_master()
    peer_rel = helpers.get_peer_relation()
    master_relinfo = peer_rel[master]

    # Be paranoid since we are about to destroy data.
    assert not reactive.helpers.is_state("postgresql.replication.is_master")
    assert not reactive.helpers.is_state("postgresql.cluster.is_running")

    # We use realpath on data_dir as it may have been replaced with
    # a symbolic link, so we empty and recreate the actual directory
    # and the links remain in place.
    data_dir = os.path.realpath(postgresql.data_dir())

    if os.path.exists(data_dir):
        hookenv.log("Removing {} in preparation for clone".format(data_dir))
        shutil.rmtree(data_dir)
    helpers.makedirs(data_dir, mode=0o700, user="******", group="postgres")

    if postgresql.has_version("10"):
        wal_method = "--wal-method=stream"
    else:
        wal_method = "--xlog-method=stream"
    cmd = [
        "sudo",
        "-H",  # -H needed to locate $HOME/.pgpass
        "-u",
        "postgres",
        "pg_basebackup",
        "-D",
        postgresql.data_dir(),
        "-h",
        master_relinfo["host"],
        "-p",
        master_relinfo["port"],
        "--checkpoint=fast",
        "--progress",
        wal_method,
        "--no-password",
        "--username=_juju_repl",
    ]
    hookenv.log("Cloning {} with {}".format(master, " ".join(cmd)))
    status_set("maintenance", "Cloning {}".format(master))
    try:
        # Switch to a directory the postgres user can access.
        with helpers.switch_cwd("/tmp"):
            subprocess.check_call(cmd, universal_newlines=True)
    except subprocess.CalledProcessError as x:
        hookenv.log("Clone failed with {}".format(x), ERROR)
        # We failed, and the local cluster is broken.
        status_set("blocked", "Failed to clone {}".format(master))
        postgresql.drop_cluster()
        reactive.remove_state("postgresql.cluster.configured")
        reactive.remove_state("postgresql.cluster.created")
        # Terminate. We need this hook to exit, rather than enter a loop.
        raise SystemExit(0)

    update_recovery_conf(follow=master)

    reactive.set_state("postgresql.replication.cloned")
    update_replication_states()
Exemple #14
0
def needs_remount():
    mounted = os.path.isdir(external_volume_mount)
    linked = os.path.islink(postgresql.data_dir())
    return mounted and not linked