def install_packages(repos, packages): """ Explicitly evaluate and install or update any specific-version dependencies and satisfy even if that involves installing an older package than is already installed. Primary use case is installing lustre-modules, which depends on a specific kernel package. :param repos: List of strings, yum repo names :param packages: List of strings, yum package names :return: package report of the format given by the lustre device plugin """ if packages != []: yum_util("clean") out = yum_util("requires", enablerepo=repos, packages=packages) for requirement in [l.strip() for l in out.strip().split("\n")]: match = re.match("([^\)/]*) = (.*)", requirement) if match: require_package, require_version = match.groups() packages.append("%s-%s" % (require_package, require_version)) yum_util("install", enablerepo=repos, packages=packages) error = _check_HYD4050() if error: return agent_error(error) ServiceControl.create("iml-update-check").start(0) return agent_result_ok
def _setup_ntp(self, server): """ Change the ntp configuration file to use the server passed. If no server is passed then use the existing setting and if there is no existing setting ask the user which server they would like to use. Enable NTPConfig to recognise legacy line marker used in previous IML manager NTP configurations routines by passing it as a parameter to the get_configured_server method call """ ntp = NTPConfig(logger=log) existing_server = ntp.get_configured_server( markers=["# Added by chroma-manager\n"]) if not server: if existing_server: server = existing_server log.info("Using existing (chroma configured) ntp server: %s" % existing_server) else: # Only if you haven't already set it server = self.get_input(msg="NTP Server", default="localhost") log.info("Writing ntp configuration: %s " % server) error = ntp.add(server) if error: log.error( "Failed to write ntp server (%s) to config file (%s), %s" % (server, ntp.CONFIG_FILE, error)) raise RuntimeError("Failure when writing ntp config: %s" % error) if ServiceControl.create("firewalld").running: error = firewall_control.add_rule("123", "udp", "ntpd") if error: log.error("firewall command failed:\n%s" % error) raise RuntimeError( "Failure when opening port in firewall for ntpd: %s" % error) log.info("Disabling chrony if active") chrony_service = ServiceControl.create("chronyd") chrony_service.stop(validate_time=0.5) chrony_service.disable() log.info("Restarting ntp") ntp_service = ServiceControl.create("ntpd") ntp_service.enable() error = ntp_service.restart() if error: log.error(error) raise RuntimeError(error)
def _restart_pgsql(self): postgresql_service = ServiceControl.create("postgresql-9.6") if postgresql_service.running: postgresql_service.reload() else: postgresql_service.start() postgresql_service.enable()
def disable_and_kill(): console_log.info("Terminating") storage_server_target = ServiceControl.create( "iml-storage-server.target") storage_server_target.disable() storage_server_target.stop()
def start_monitored_copytool(id): # Start the monitor first so that we have a reader on the FIFO when # the copytool begins emitting events. Then start the copytool copytool_vars = _copytool_vars(id) for service_name in ["chroma-copytool-monitor", "chroma-copytool"]: _write_service_init( service_name, copytool_vars["id"], copytool_vars["ct_path"], copytool_vars["ct_arguments"], ) service = ServiceControl.create("%s-%s" % (service_name, id)) service.daemon_reload() if service.running: error = service.restart() else: error = service.start() if error: return agent_error(error) return agent_result_ok
def _stop_services(self): log.info("Stopping iml manager") controller = ServiceControl.create("iml-manager.target") error = controller.stop(validate_time=0.5) if error: log.error(error) raise RuntimeError(error)
def _configure_firewall(self): if ServiceControl.create("firewalld").running: for port in [80, 443]: self.try_shell([ "firewall-cmd", "--permanent", "--add-port={}/tcp".format(port) ]) self.try_shell( ["firewall-cmd", "--add-port={}/tcp".format(port)])
def _configure_firewall(self): if ServiceControl.create('firewalld').running: for port in [80, 443]: self.try_shell([ 'firewall-cmd', '--permanent', '--add-port={}/tcp'.format(port) ]) self.try_shell( ['firewall-cmd', '--add-port={}/tcp'.format(port)])
def _stop_services(self): log.info("Stopping daemons") for service in self.CONTROLLED_SERVICES: controller = ServiceControl.create(service) error = controller.stop() if error: log.error(error) raise RuntimeError(error)
def _enable_services(self): log.info("Enabling daemons") for service in self.CONTROLLED_SERVICES: controller = ServiceControl.create(service) error = controller.enable() if error: log.error(error) raise RuntimeError(error)
def setUp(self): super(TestServiceStateEL7, self).setUp() mock.patch.object( util, 'platform_info', util.PlatformInfo('Linux', 'CentOS', 0.0, '7.3', 0.0, 0, '')).start() self.test = ServiceControl.create('test_service') self.assertEqual(type(self.test), ServiceControlEL7)
def setUp(self): super(TestServiceStateEL7, self).setUp() mock.patch.object( util, "platform_info", util.PlatformInfo("Linux", "CentOS", 0.0, "7.3", 0.0, 0, "")).start() self.test = ServiceControl.create("test_service") self.assertEqual(type(self.test), ServiceControlEL7)
def _service_config(interesting_services=None): """Interrogate the current status of services, it should be noted that el7 calls to systemctl through ServiceControl will redirect to chkconfig if the specified service is not native (SysV style init as opposed to systemd unit init file) """ log.info("Checking service configuration...") services = {} for service_name in interesting_services: controller = ServiceControl.create(service_name) services[service_name] = {"enabled": controller.enabled, "running": controller.running} return services
def _setup_grafana(self): # grafana needs daemon-reload before enable and start ServiceControlEL7.daemon_reload() service = ServiceControl.create("grafana-server") error = service.enable() if error: log.error(error) raise RuntimeError(error) if service.running: service.stop() error = service.start() if error: log.error(error) raise RuntimeError(error)
def _setup_pgsql(self, database, check_db_space): log.info("Setting up PostgreSQL service...") self._init_pgsql(database) postgresql_service = ServiceControl.create("postgresql") postgresql_service.restart() postgresql_service.enable() tries = 0 while self.shell(["su", "postgres", "-c", "psql -c '\\d'"])[0] != 0: if tries >= 4: raise RuntimeError( "Timed out waiting for PostgreSQL service to start") tries += 1 time.sleep(1) error = self._check_db_space(self.REQUIRED_DB_SPACE_GB) if check_db_space and error: return error if not self._db_accessible(): log.info("Creating database owner '%s'...\n" % database["USER"]) # Enumerate existing roles _, roles_str, _ = self.try_shell([ "su", "postgres", "-c", "psql -t -c 'select " "rolname from pg_roles;'" ]) roles = [ line.strip() for line in roles_str.split("\n") if line.strip() ] # Create database['USER'] role if not found if not database["USER"] in roles: self.try_shell([ "su", "postgres", "-c", "psql -c 'CREATE ROLE %s NOSUPERUSER " "CREATEDB NOCREATEROLE INHERIT LOGIN;'" % database["USER"], ]) log.info("Creating database '%s'...\n" % database["NAME"]) self.try_shell([ "su", "postgres", "-c", "createdb -O %s %s;" % (database["USER"], database["NAME"]) ]) return None
def _start_services(self): log.info("Starting daemons") for service in self.CONTROLLED_SERVICES: controller = ServiceControl.create(service) if controller.running: if service.endswith(".target"): error = False else: error = controller.reload() else: error = controller.start() if error: log.error(error) raise RuntimeError(error)
def stop_monitored_copytool(id): # Stop the monitor after the copytool so that we can relay the # unconfigure event. for service_name in ['chroma-copytool-monitor', 'chroma-copytool']: service = ServiceControl.create('%s-%s' % (service_name, id)) if os.path.exists(_init_file_name(service_name, id)) and service.running: error = service.stop() if error: return agent_error(error) os.remove(_init_file_name(service_name, id)) service.daemon_reload() # Finally cause the system agents to see our changes. return agent_result_ok
def validate(self): errors = [] if not self._db_accessible(): errors.append("Cannot connect to database") elif not self._db_current(): errors.append("Database tables out of date") elif not self._users_exist(): errors.append("No user accounts exist") controller = ServiceControl.create("iml-manager.target") try: if not controller.enabled: errors.append("iml-manager.target not set to start at boot") if not controller.running: errors.append("iml-manager.target is not running") except KeyError: errors.append("iml-manager.target not found") return errors
def _start_services(self): log.info("Starting daemons") for service in self.CONTROLLED_SERVICES: controller = ServiceControl.create(service) error = controller.start() if error: log.error(error) raise RuntimeError(error) SUPERVISOR_START_TIMEOUT = 10 t = 0 while True: if not SupervisorStatus().get_non_running_services(): break else: time.sleep(1) t += 1 if t > SUPERVISOR_START_TIMEOUT: msg = "Some services failed to start: %s" % \ ", ".join(SupervisorStatus().get_non_running_services()) log.error(msg) raise RuntimeError(msg)
def _stop_services(self): log.info("Stopping daemons") for service in self.CONTROLLED_SERVICES: controller = ServiceControl.create(service) error = controller.stop() if error: log.error(error) raise RuntimeError(error) # Wait for supervisord to stop running SUPERVISOR_STOP_TIMEOUT = 20 t = 0 stopped = False while True: try: SupervisorStatus().get_all_process_info() except socket.error: # No longer up stopped = True except xmlrpclib.Fault, e: if (e.faultCode, e.faultString) == (6, 'SHUTDOWN_STATE'): # Up but shutting down pass else: raise if stopped: break else: if t > SUPERVISOR_STOP_TIMEOUT: raise RuntimeError( "chroma-supervisor failed to stop after %s seconds" % SUPERVISOR_STOP_TIMEOUT) else: t += 1 time.sleep(1)
# Copyright (c) 2018 DDN. All rights reserved. # Use of this source code is governed by a MIT-style # license that can be found in the LICENSE file. from iml_common.lib.ntp import NTPConfig from iml_common.lib.agent_rpc import agent_ok_or_error from iml_common.lib.service_control import ServiceControl ntp_service = ServiceControl.create("ntpd") chrony_service = ServiceControl.create("chronyd") def unconfigure_ntp(): """ Unconfigure the ntp client :return: Value using simple return protocol """ return configure_ntp(None) def configure_ntp(ntp_server): """ Change the ntp configuration file to use the server passed :return: Value using simple return protocol """ error = NTPConfig().add(ntp_server) if error: return error else:
""" import os from chroma_agent import config from chroma_agent import conf from chroma_agent.agent_client import AgentClient, HttpError from chroma_agent.agent_daemon import ServerProperties from chroma_agent.crypto import Crypto from chroma_agent.device_plugins.action_runner import CallbackAfterResponse from chroma_agent.log import console_log from chroma_agent.plugin_manager import ActionPluginManager, DevicePluginManager from iml_common.lib.service_control import ServiceControl from iml_common.lib.agent_rpc import agent_ok_or_error agent_service = ServiceControl.create("chroma-agent") def _service_is_running(): # returns True if running return agent_service.running def deregister_server(): conf.remove_server_url() def disable_and_kill(): console_log.info("Terminating") storage_server_target = ServiceControl.create( "iml-storage-server.target")
# Copyright (c) 2018 DDN. All rights reserved. # Use of this source code is governed by a MIT-style # license that can be found in the LICENSE file. from iml_common.lib.ntp import NTPConfig from iml_common.lib.agent_rpc import agent_ok_or_error from iml_common.lib.service_control import ServiceControl ntp_service = ServiceControl.create("ntpd") def unconfigure_ntp(): """ Unconfigure the ntp client :return: Value using simple return protocol """ return configure_ntp(None) def configure_ntp(ntp_server): """ Change the ntp configuration file to use the server passed :return: Value using simple return protocol """ error = NTPConfig().add(ntp_server) if error: return error else: return agent_ok_or_error(ntp_service.restart())
# Copyright (c) 2018 DDN. All rights reserved. # Use of this source code is governed by a MIT-style # license that can be found in the LICENSE file. from iml_common.lib.ntp import NTPConfig from iml_common.lib.agent_rpc import agent_ok_or_error from iml_common.lib.service_control import ServiceControl ntp_service = ServiceControl.create('ntpd') def unconfigure_ntp(): """ Unconfigure the ntp client :return: Value using simple return protocol """ return configure_ntp(None) def configure_ntp(ntp_server): """ Change the ntp configuration file to use the server passed :return: Value using simple return protocol """ error = NTPConfig().add(ntp_server) if error: return error else: return agent_ok_or_error(ntp_service.restart())
# Copyright (c) 2017 Intel Corporation. All rights reserved. # Use of this source code is governed by a MIT-style # license that can be found in the LICENSE file. import os from chroma_agent.device_plugins.syslog import SYSLOG_PORT from iml_common.lib.agent_rpc import agent_ok_or_error from iml_common.lib.service_control import ServiceControl rsyslog_service = ServiceControl.create('rsyslog') def unconfigure_rsyslog(): """ Modify the rsyslogd configuration to stop forwarding messages to chroma :return: None """ return _configure_rsyslog("") def configure_rsyslog(): """ Modify the rsyslogd configuration to forward all messages to chroma :return: None """ return _configure_rsyslog("127.0.0.1")
Corosync verification """ from chroma_agent.log import console_log from chroma_agent.lib.shell import AgentShell from chroma_agent.lib.corosync import CorosyncRingInterface from chroma_agent.action_plugins.manage_corosync_common import InterfaceInfo from iml_common.lib.service_control import ServiceControl from iml_common.lib.firewall_control import FirewallControl from iml_common.lib.agent_rpc import agent_error from iml_common.lib.agent_rpc import agent_ok_or_error PCS_TCP_PORT = 2224 corosync_service = ServiceControl.create("corosync") pcsd_service = ServiceControl.create("pcsd") firewall_control = FirewallControl.create() PCS_USER = "******" PCS_CLUSTER_NAME = "lustre-ha-cluster" COROSYNC_CONF_PATH = "/etc/corosync/corosync.conf" def start_corosync2(): return agent_ok_or_error(corosync_service.enable() or corosync_service.start()) def stop_corosync2(): return agent_ok_or_error(corosync_service.stop())
def corosync_running(): return ServiceControl.create('corosync').running
it from manager) """ import os from chroma_agent import config from chroma_agent.agent_client import AgentClient, HttpError from chroma_agent.agent_daemon import ServerProperties from chroma_agent.crypto import Crypto from chroma_agent.device_plugins.action_runner import CallbackAfterResponse from chroma_agent.log import console_log from chroma_agent.plugin_manager import ActionPluginManager, DevicePluginManager from iml_common.lib.service_control import ServiceControl from iml_common.lib.agent_rpc import agent_ok_or_error agent_service = ServiceControl.create('chroma-agent') def _service_is_running(): # returns True if running return agent_service.running def _start_service(): return agent_ok_or_error(agent_service.start()) def _stop_service(): return agent_ok_or_error(agent_service.stop())
from chroma_agent.log import daemon_log from manage_corosync import start_corosync, stop_corosync from chroma_agent.lib.pacemaker import pacemaker_running from chroma_agent.lib.corosync import corosync_running from iml_common.lib.service_control import ServiceControl from iml_common.lib.agent_rpc import agent_error, agent_result_ok, agent_ok_or_error # The window of time in which we count resource monitor failures RSRC_FAIL_WINDOW = "20m" # The number of times in the above window a resource monitor can fail # before we migrate it RSRC_FAIL_MIGRATION_COUNT = "3" PACEMAKER_CONFIGURE_TIMEOUT = 120 pacemaker_service = ServiceControl.create('pacemaker') corosync_service = ServiceControl.create('corosync') def _get_cluster_size(): # you'd think there'd be a way to query the value of a property # such as "expected-quorum-votes" but there does not seem to be, so # just count nodes instead rc, stdout, stderr = AgentShell.run_old(["crm_node", "-l"]) if not stdout: return 0 n = 0 for line in stdout.rstrip().split('\n'): node_id, name, status = line.split(" ")
from os import remove from collections import namedtuple import errno import re from iml_common.lib.service_control import ServiceControl from iml_common.lib.firewall_control import FirewallControl from chroma_agent.lib.corosync import ( CorosyncRingInterface, render_config, write_config_to_file, ) from iml_common.lib.agent_rpc import agent_error, agent_result_ok, agent_ok_or_error corosync_service = ServiceControl.create("corosync") firewall_control = FirewallControl.create() def start_corosync(): return agent_ok_or_error(corosync_service.start()) def stop_corosync(): return agent_ok_or_error(corosync_service.stop()) def restart_corosync(): return agent_ok_or_error(corosync_service.restart())