def set_config_defaults(config, *, check_commands=True): # TODO: consider implementing a real configuration schema at some point # misc global defaults config.setdefault("backup_location", None) config.setdefault("http_address", PGHOARD_HOST) config.setdefault("http_port", PGHOARD_PORT) config.setdefault("alert_file_dir", config.get("backup_location") or os.getcwd()) config.setdefault("json_state_file_path", "/var/lib/pghoard/pghoard_state.json") config.setdefault("maintenance_mode_file", "/var/lib/pghoard/maintenance_mode_file") config.setdefault("log_level", "INFO") config.setdefault("path_prefix", "") config.setdefault("upload_retries_warning_limit", 3) # default to 5 compression and transfer threads config.setdefault("compression", {}).setdefault("thread_count", max(get_cpu_count(), 5)) config.setdefault("transfer", {}).setdefault("thread_count", max(get_cpu_count(), 5)) # default to prefetching min(#compressors, #transferagents) - 1 objects so all # operations where prefetching is used run fully in parallel without waiting to start config.setdefault( "restore_prefetch", min(config["compression"]["thread_count"], config["transfer"]["thread_count"]) - 1) # if compression algorithm is not explicitly set prefer snappy if it's available if snappy is not None: config["compression"].setdefault("algorithm", "snappy") else: config["compression"].setdefault("algorithm", "lzma") config["compression"].setdefault("level", 0) # defaults for sites config.setdefault("backup_sites", {}) for site_name, site_config in config["backup_sites"].items(): site_config.setdefault("active", True) site_config.setdefault("active_backup_mode", "pg_receivexlog") site_config.setdefault("basebackup_count", 2) site_config.setdefault("basebackup_interval_hours", 24) site_config.setdefault( "basebackup_mode", "pipe" if site_config.get("stream_compression") else "basic") site_config.setdefault("encryption_key_id", None) site_config.setdefault("object_storage", None) # NOTE: pg_data_directory doesn't have a default value data_dir = site_config.get("pg_data_directory") if not data_dir and site_config["basebackup_mode"] == "local-tar": raise InvalidConfigurationError( "Site {!r}: pg_data_directory must be set to use `local-tar` backup mode" .format(site_name)) version_file = os.path.join(data_dir, "PG_VERSION") if data_dir else None if version_file and os.path.exists(version_file): with open(version_file, "r") as fp: site_config["pg_data_directory_version"] = fp.read().strip() else: site_config["pg_data_directory_version"] = None # FIXME: pg_xlog_directory has historically had a default value, but we should probably get rid of it # as an incorrect value here will have unfortunate consequences. Also, since we now have a # pg_data_directory configuration option we should just generate pg_xlog directory based on it. But # while we have a separate pg_xlog directory, and while we have a default value for it, we'll still # base it on pg_data_directory if it was set. if not data_dir: data_dir = "/var/lib/pgsql/data" site_config.setdefault("pg_xlog_directory", os.path.join(data_dir, "pg_xlog")) obj_store = site_config["object_storage"] or {} if not obj_store: pass elif "storage_type" not in obj_store: raise InvalidConfigurationError( "Site {!r}: storage_type not defined for object_storage". format(site_name)) elif obj_store["storage_type"] == "local" and obj_store.get( "directory") == config.get("backup_location"): raise InvalidConfigurationError( "Site {!r}: invalid 'local' target directory {!r}, must be different from 'backup_location'" .format(site_name, config.get("backup_location"))) else: try: get_class_for_transfer(obj_store["storage_type"]) except ImportError as ex: raise InvalidConfigurationError( "Site {0!r} object_storage: {1.__class__.__name__!s}: {1!s}" .format(site_name, ex)) # Set command paths and check their versions per site. We use a configured value if one was provided # (either at top level or per site), if it wasn't provided but we have a valid pg_data_directory with # PG_VERSION in it we'll look for commands for that version from the expected paths for Debian and # RHEL/Fedora PGDG packages or otherwise fall back to iterating over the available versions. # Instead of setting paths explicitly for both commands, it's also possible to just set the # pg_bin_directory to point to the version-specific bin directory. bin_dir = site_config.get("pg_bin_directory") for command in ["pg_basebackup", "pg_receivexlog"]: command_key = "{}_path".format(command) command_path = site_config.get(command_key) or config.get( command_key) if not command_path: command_path = os.path.join(bin_dir, command) if bin_dir else None if not command_path or not os.path.exists(command_path): pg_version = site_config["pg_data_directory_version"] command_path = find_pg_binary( command, [pg_version] if pg_version else None) site_config[command_key] = command_path if check_commands and site_config["active"]: if not command_path or not os.path.exists(command_path): raise InvalidConfigurationError( "Site {!r} command {!r} not found".format( site_name, command)) version_output = subprocess.check_output( [command_path, "--version"]) version_string = version_output.decode("ascii").strip() site_config[command + "_version"] = convert_pg_command_version_to_number( version_string) else: site_config[command + "_version"] = None return config
def set_and_check_config_defaults(config, *, check_commands=True, check_pgdata=True): # TODO: consider implementing a real configuration schema at some point # misc global defaults config.setdefault("backup_location", None) config.setdefault("http_address", PGHOARD_HOST) config.setdefault("http_port", PGHOARD_PORT) config.setdefault("alert_file_dir", config.get("backup_location") or os.getcwd()) config.setdefault("json_state_file_path", "/var/lib/pghoard/pghoard_state.json") config.setdefault("maintenance_mode_file", "/var/lib/pghoard/maintenance_mode_file") config.setdefault("log_level", "INFO") config.setdefault("path_prefix", "") # deprecated, used in the default path for sites config.setdefault("upload_retries_warning_limit", 3) # default to cpu_count + 1 compression threads config.setdefault("compression", {}).setdefault( "thread_count", get_cpu_count() + 1, ) # default to cpu_count + 3 transfer threads (max 20) config.setdefault("transfer", {}).setdefault( "thread_count", min(get_cpu_count() + 3, 20), ) # default to prefetching transfer.thread_count objects config.setdefault("restore_prefetch", config["transfer"]["thread_count"]) # if compression algorithm is not explicitly set prefer snappy if it's available if snappy is not None: config["compression"].setdefault("algorithm", "snappy") else: config["compression"].setdefault("algorithm", "lzma") config["compression"].setdefault("level", 0) # defaults for sites config.setdefault("backup_sites", {}) for site_name, site_config in config["backup_sites"].items(): site_config.setdefault("active", True) site_config.setdefault("active_backup_mode", "pg_receivexlog") site_config.setdefault("basebackup_chunk_size", 1024 * 1024 * 1024 * 2) site_config.setdefault("basebackup_chunks_in_progress", 5) site_config.setdefault("basebackup_count", 2) site_config.setdefault("basebackup_interval_hours", 24) # NOTE: stream_compression removed from documentation after 1.6.0 release site_config.setdefault( "basebackup_mode", "pipe" if site_config.get("stream_compression") else "basic") site_config.setdefault("encryption_key_id", None) site_config.setdefault("object_storage", None) site_config.setdefault("prefix", os.path.join(config["path_prefix"], site_name)) # NOTE: pg_data_directory doesn't have a default value data_dir = site_config.get("pg_data_directory") if not data_dir and check_pgdata: raise InvalidConfigurationError( "Site {!r}: pg_data_directory must be set".format(site_name)) if check_pgdata: version_file = os.path.join(data_dir, "PG_VERSION") if data_dir else None with open(version_file, "r") as fp: site_config["pg_data_directory_version"] = fp.read().strip() obj_store = site_config["object_storage"] or {} if not obj_store: pass elif "storage_type" not in obj_store: raise InvalidConfigurationError( "Site {!r}: storage_type not defined for object_storage". format(site_name)) elif obj_store["storage_type"] == "local" and obj_store.get( "directory") == config.get("backup_location"): raise InvalidConfigurationError( "Site {!r}: invalid 'local' target directory {!r}, must be different from 'backup_location'" .format(site_name, config.get("backup_location"))) else: try: get_class_for_transfer(obj_store) except ImportError as ex: raise InvalidConfigurationError( "Site {0!r} object_storage: {1.__class__.__name__!s}: {1!s}" .format(site_name, ex)) # Set command paths and check their versions per site. We use a configured value if one was provided # (either at top level or per site), if it wasn't provided but we have a valid pg_data_directory with # PG_VERSION in it we'll look for commands for that version from the expected paths for Debian and # RHEL/Fedora PGDG packages or otherwise fall back to iterating over the available versions. # Instead of setting paths explicitly for both commands, it's also possible to just set the # pg_bin_directory to point to the version-specific bin directory. bin_dir = site_config.get("pg_bin_directory") for command in ["pg_basebackup", "pg_receivexlog"]: # NOTE: pg_basebackup_path and pg_receivexlog_path removed from documentation after 1.6.0 release command_key = "{}_path".format(command) command_path = site_config.get(command_key) or config.get( command_key) if not command_path: command_path = os.path.join(bin_dir, command) if bin_dir else None if not command_path or not os.path.exists(command_path): pg_versions_to_check = None if "pg_data_directory_version" in site_config: pg_versions_to_check = [ site_config["pg_data_directory_version"] ] command_path, _ = find_pg_binary(command, pg_versions_to_check) site_config[command_key] = command_path if check_commands and site_config["active"]: if not command_path or not os.path.exists(command_path): raise InvalidConfigurationError( "Site {!r} command {!r} not found from path {}".format( site_name, command, command_path)) version_output = subprocess.check_output( [command_path, "--version"]) version_string = version_output.decode("ascii").strip() site_config[command + "_version"] = convert_pg_command_version_to_number( version_string) else: site_config[command + "_version"] = None return config
def set_config_defaults(config, *, check_commands=True): # TODO: consider implementing a real configuration schema at some point # misc global defaults config.setdefault("backup_location", None) config.setdefault("http_address", PGHOARD_HOST) config.setdefault("http_port", PGHOARD_PORT) config.setdefault("alert_file_dir", config.get("backup_location") or os.getcwd()) config.setdefault("json_state_file_path", "/tmp/pghoard_state.json") # XXX: get a better default config.setdefault("log_level", "INFO") config.setdefault("path_prefix", "") config.setdefault("upload_retries_warning_limit", 3) # set command paths and check their versions for command in ["pg_basebackup", "pg_receivexlog"]: command_path = config.setdefault(command + "_path", "/usr/bin/" + command) if check_commands: version_output = subprocess.check_output([command_path, "--version"]) version_string = version_output.decode("ascii").strip() config[command + "_version"] = convert_pg_command_version_to_number(version_string) else: config[command + "_version"] = None # default to 5 compression and transfer threads config.setdefault("compression", {}).setdefault("thread_count", 5) config.setdefault("transfer", {}).setdefault("thread_count", 5) # default to prefetching min(#compressors, #transferagents) - 1 objects so all # operations where prefetching is used run fully in parallel without waiting to start config.setdefault("restore_prefetch", min( config["compression"]["thread_count"], config["transfer"]["thread_count"]) - 1) # if compression algorithm is not explicitly set prefer snappy if it's available if snappy is not None: config["compression"].setdefault("algorithm", "snappy") else: config["compression"].setdefault("algorithm", "lzma") config["compression"].setdefault("level", 0) # defaults for sites config.setdefault("backup_sites", {}) for site_name, site_config in config["backup_sites"].items(): site_config.setdefault("active", True) site_config.setdefault("active_backup_mode", "pg_receivexlog") site_config.setdefault("basebackup_count", 2) site_config.setdefault("basebackup_interval_hours", 24) site_config.setdefault("basebackup_mode", "pipe" if site_config.get("stream_compression") else "basic") site_config.setdefault("encryption_key_id", None) site_config.setdefault("object_storage", None) site_config.setdefault("pg_xlog_directory", "/var/lib/pgsql/data/pg_xlog") obj_store = site_config["object_storage"] or {} if not obj_store: pass elif "storage_type" not in obj_store: raise InvalidConfigurationError("Site {!r}: storage_type not defined for object_storage".format(site_name)) elif obj_store["storage_type"] == "local" and obj_store.get("directory") == config.get("backup_location"): raise InvalidConfigurationError( "Site {!r}: invalid 'local' target directory {!r}, must be different from 'backup_location'".format( site_name, config.get("backup_location"))) else: try: get_class_for_transfer(obj_store["storage_type"]) except ImportError as ex: raise InvalidConfigurationError( "Site {0!r} object_storage: {1.__class__.__name__!s}: {1!s}".format(site_name, ex)) return config
def set_config_defaults(config, *, check_commands=True): # TODO: consider implementing a real configuration schema at some point # misc global defaults config.setdefault("backup_location", None) config.setdefault("http_address", PGHOARD_HOST) config.setdefault("http_port", PGHOARD_PORT) config.setdefault("alert_file_dir", config.get("backup_location") or os.getcwd()) config.setdefault("json_state_file_path", "/tmp/pghoard_state.json") # XXX: get a better default config.setdefault("log_level", "INFO") config.setdefault("path_prefix", "") config.setdefault("upload_retries_warning_limit", 3) # default to 5 compression and transfer threads config.setdefault("compression", {}).setdefault("thread_count", 5) config.setdefault("transfer", {}).setdefault("thread_count", 5) # default to prefetching min(#compressors, #transferagents) - 1 objects so all # operations where prefetching is used run fully in parallel without waiting to start config.setdefault( "restore_prefetch", min(config["compression"]["thread_count"], config["transfer"]["thread_count"]) - 1 ) # if compression algorithm is not explicitly set prefer snappy if it's available if snappy is not None: config["compression"].setdefault("algorithm", "snappy") else: config["compression"].setdefault("algorithm", "lzma") config["compression"].setdefault("level", 0) # defaults for sites config.setdefault("backup_sites", {}) for site_name, site_config in config["backup_sites"].items(): site_config.setdefault("active", True) site_config.setdefault("active_backup_mode", "pg_receivexlog") site_config.setdefault("basebackup_count", 2) site_config.setdefault("basebackup_interval_hours", 24) site_config.setdefault("basebackup_mode", "pipe" if site_config.get("stream_compression") else "basic") site_config.setdefault("encryption_key_id", None) site_config.setdefault("object_storage", None) # NOTE: pg_data_directory doesn't have a default value data_dir = site_config.get("pg_data_directory") if not data_dir and site_config["basebackup_mode"] == "local-tar": raise InvalidConfigurationError( "Site {!r}: pg_data_directory must be set to use `local-tar` backup mode".format(site_name) ) version_file = os.path.join(data_dir, "PG_VERSION") if data_dir else None if version_file and os.path.exists(version_file): with open(version_file, "r") as fp: site_config["pg_data_directory_version"] = fp.read().strip() else: site_config["pg_data_directory_version"] = None # FIXME: pg_xlog_directory has historically had a default value, but we should probably get rid of it # as an incorrect value here will have unfortunate consequences. Also, since we now have a # pg_data_directory configuration option we should just generate pg_xlog directory based on it. But # while we have a separate pg_xlog directory, and while we have a default value for it, we'll still # base it on pg_data_directory if it was set. if not data_dir: data_dir = "/var/lib/pgsql/data" site_config.setdefault("pg_xlog_directory", os.path.join(data_dir, "pg_xlog")) obj_store = site_config["object_storage"] or {} if not obj_store: pass elif "storage_type" not in obj_store: raise InvalidConfigurationError("Site {!r}: storage_type not defined for object_storage".format(site_name)) elif obj_store["storage_type"] == "local" and obj_store.get("directory") == config.get("backup_location"): raise InvalidConfigurationError( "Site {!r}: invalid 'local' target directory {!r}, must be different from 'backup_location'".format( site_name, config.get("backup_location") ) ) else: try: get_class_for_transfer(obj_store["storage_type"]) except ImportError as ex: raise InvalidConfigurationError( "Site {0!r} object_storage: {1.__class__.__name__!s}: {1!s}".format(site_name, ex) ) # Set command paths and check their versions per site. We use a configured value if one was provided # (either at top level or per site), if it wasn't provided but we have a valid pg_data_directory with # PG_VERSION in it we'll look for commands for that version from the expected paths for Debian and # RHEL/Fedora PGDG packages or otherwise fall back to iterating over the available versions. # Instead of setting paths explicitly for both commands, it's also possible to just set the # pg_bin_directory to point to the version-specific bin directory. bin_dir = site_config.get("pg_bin_directory") for command in ["pg_basebackup", "pg_receivexlog"]: command_key = "{}_path".format(command) command_path = site_config.get(command_key) or config.get(command_key) if not command_path: command_path = os.path.join(bin_dir, command) if bin_dir else None if not command_path or not os.path.exists(command_path): pg_version = site_config["pg_data_directory_version"] command_path = find_pg_binary(command, [pg_version] if pg_version else None) site_config[command_key] = command_path if check_commands and site_config["active"]: if not command_path or not os.path.exists(command_path): raise InvalidConfigurationError("Site {!r} command {!r} not found".format(site_name, command)) version_output = subprocess.check_output([command_path, "--version"]) version_string = version_output.decode("ascii").strip() site_config[command + "_version"] = convert_pg_command_version_to_number(version_string) else: site_config[command + "_version"] = None return config
def set_and_check_config_defaults(config, *, check_commands=True, check_pgdata=True): # TODO: consider implementing a real configuration schema at some point # misc global defaults config.setdefault("backup_location", None) config.setdefault("http_address", PGHOARD_HOST) config.setdefault("http_port", PGHOARD_PORT) config.setdefault("alert_file_dir", config.get("backup_location") or os.getcwd()) config.setdefault("json_state_file_path", "/var/lib/pghoard/pghoard_state.json") config.setdefault("maintenance_mode_file", "/var/lib/pghoard/maintenance_mode_file") config.setdefault("log_level", "INFO") config.setdefault("path_prefix", "") # deprecated, used in the default path for sites config.setdefault("tar_executable", "pghoard_gnutaremu") config.setdefault("upload_retries_warning_limit", 3) config.setdefault("hash_algorithm", "sha1") # default to cpu_count + 1 compression threads config.setdefault("compression", {}).setdefault( "thread_count", get_cpu_count() + 1, ) # default to cpu_count + 3 transfer threads (max 20) config.setdefault("transfer", {}).setdefault( "thread_count", min(get_cpu_count() + 3, 20), ) # With 20 processes the restoration is almost always IO bound so using more CPU # cores isn't typically useful and just adds general overhead. # Only create CPU count + 1 processes as hosts with small number of CPUs often # have little memory too and each restore process can use fair amount of memory. config.setdefault("restore_process_count", min(get_cpu_count() + 1, 20)) # default to prefetching transfer.thread_count objects config.setdefault("restore_prefetch", config["transfer"]["thread_count"]) # if compression algorithm is not explicitly set prefer snappy if it's available if snappy is not None: config["compression"].setdefault("algorithm", "snappy") else: config["compression"].setdefault("algorithm", "lzma") config["compression"].setdefault("level", 0) # defaults for sites config.setdefault("backup_sites", {}) for site_name, site_config in config["backup_sites"].items(): site_config.setdefault("active", True) site_config.setdefault("active_backup_mode", "pg_receivexlog") site_config.setdefault("basebackup_chunk_size", 1024 * 1024 * 1024 * 2) site_config.setdefault("basebackup_chunks_in_progress", 5) site_config.setdefault("basebackup_count", 2) site_config.setdefault("basebackup_interval_hours", 24) # NOTE: stream_compression removed from documentation after 1.6.0 release site_config.setdefault("basebackup_mode", "pipe" if site_config.get("stream_compression") else "basic") site_config.setdefault("encryption_key_id", None) site_config.setdefault("object_storage", None) site_config.setdefault("prefix", os.path.join(config["path_prefix"], site_name)) # NOTE: pg_data_directory doesn't have a default value data_dir = site_config.get("pg_data_directory") if not data_dir and check_pgdata: raise InvalidConfigurationError( "Site {!r}: pg_data_directory must be set".format(site_name)) if check_pgdata: version_file = os.path.join(data_dir, "PG_VERSION") if data_dir else None with open(version_file, "r") as fp: site_config["pg_data_directory_version"] = fp.read().strip() obj_store = site_config["object_storage"] or {} if not obj_store: pass elif "storage_type" not in obj_store: raise InvalidConfigurationError("Site {!r}: storage_type not defined for object_storage".format(site_name)) elif obj_store["storage_type"] == "local" and obj_store.get("directory") == config.get("backup_location"): raise InvalidConfigurationError( "Site {!r}: invalid 'local' target directory {!r}, must be different from 'backup_location'".format( site_name, config.get("backup_location"))) else: try: get_class_for_transfer(obj_store) except ImportError as ex: raise InvalidConfigurationError( "Site {0!r} object_storage: {1.__class__.__name__!s}: {1!s}".format(site_name, ex)) # Set command paths and check their versions per site. We use a configured value if one was provided # (either at top level or per site), if it wasn't provided but we have a valid pg_data_directory with # PG_VERSION in it we'll look for commands for that version from the expected paths for Debian and # RHEL/Fedora PGDG packages or otherwise fall back to iterating over the available versions. # Instead of setting paths explicitly for both commands, it's also possible to just set the # pg_bin_directory to point to the version-specific bin directory. bin_dir = site_config.get("pg_bin_directory") for command in ["pg_basebackup", "pg_receivexlog"]: # NOTE: pg_basebackup_path and pg_receivexlog_path removed from documentation after 1.6.0 release command_key = "{}_path".format(command) command_path = site_config.get(command_key) or config.get(command_key) if not command_path: command_path = os.path.join(bin_dir, command) if bin_dir else None if not command_path or not os.path.exists(command_path): pg_versions_to_check = None if "pg_data_directory_version" in site_config: pg_versions_to_check = [site_config["pg_data_directory_version"]] command_path, _ = find_pg_binary(command, pg_versions_to_check) site_config[command_key] = command_path if check_commands and site_config["active"]: if not command_path or not os.path.exists(command_path): raise InvalidConfigurationError("Site {!r} command {!r} not found from path {}".format( site_name, command, command_path)) version_output = subprocess.check_output([command_path, "--version"]) version_string = version_output.decode("ascii").strip() site_config[command + "_version"] = convert_pg_command_version_to_number(version_string) else: site_config[command + "_version"] = None return config