def validate(self): """Check below requirements are met in setup. 1. Check if given product is supported by SSPL 2. Check if given setup is supported by SSPL 3. Check if required pre-requisites softwares are installed. 4. Validate BMC connectivity 5. Validate storage controller connectivity """ machine_id = Utility.get_machine_id() # Validate input/provisioner configs self.product = Utility.get_config_value(consts.PRVSNR_CONFIG_INDEX, "cortx>release>product") self.setup = Utility.get_config_value(consts.PRVSNR_CONFIG_INDEX, "cortx>release>setup") node_type = Utility.get_config_value( consts.PRVSNR_CONFIG_INDEX, "server_node>%s>type" % machine_id) if node_type.lower() not in ["vm", "virtual"]: bmc_ip = Utility.get_config_value( consts.PRVSNR_CONFIG_INDEX, "server_node>%s>bmc>ip" % machine_id) enclosure_id = Utility.get_config_value( consts.PRVSNR_CONFIG_INDEX, "server_node>%s>storage>enclosure_id" % machine_id) Utility.get_config_value(consts.PRVSNR_CONFIG_INDEX, "storage_enclosure>%s>type" % enclosure_id) primary_ip = Utility.get_config_value( consts.PRVSNR_CONFIG_INDEX, "storage_enclosure>%s>controller>primary>ip" % enclosure_id) secondary_ip = Utility.get_config_value( consts.PRVSNR_CONFIG_INDEX, "storage_enclosure>%s>controller>secondary>ip" % enclosure_id) # Validate product support if self.product not in consts.enabled_products: msg = "Product '%s' is not in sspl supported product list: %s" % ( self.product, consts.enabled_products) logger.error(msg) raise SetupError(errno.EINVAL, msg) # Validate setup support if self.setup not in consts.setups: msg = "Setup '%s' is not in sspl supported setup list: %s" % ( self.setup, consts.setups) logger.error(msg) raise SetupError(errno.EINVAL, msg) # Validate required pip3s and rpms are installed self.validate_dependencies(self.setup) # Validate BMC & Storage controller IP reachability if node_type.lower() not in ["vm", "virtual"]: # cluster_id required for decrypting the secret is only available from # the prepare stage. However accessibility validation will be done in # prepare stage. So at this time, validating ip reachability is fine. NetworkV().validate("connectivity", [bmc_ip, primary_ip, secondary_ip])
def validate(self): """Check for required packages are installed.""" # RPM dependency rpm_deps = {"cortx-sspl-test": None} # python 3rd party package dependency pip3_3ps_packages_test = {"Flask": "1.1.1"} pkg_validator = PkgV() pkg_validator.validate_pip3_pkgs(host=socket.getfqdn(), pkgs=pip3_3ps_packages_test, skip_version_check=False) pkg_validator.validate_rpm_pkgs(host=socket.getfqdn(), pkgs=rpm_deps, skip_version_check=True) # Load global, sspl and test configs Conf.load(SSPL_CONFIG_INDEX, sspl_config_path) Conf.load(SSPL_TEST_CONFIG_INDEX, sspl_test_config_path) # Take copy of supplied config passed to sspl_test and load it with open(self.sspl_test_gc_copy_file, "w") as f: f.write("") self.sspl_test_gc_copy_url = "yaml://%s" % self.sspl_test_gc_copy_file Conf.load(SSPL_TEST_GLOBAL_CONFIG, self.sspl_test_gc_copy_url) Conf.load("global_config", self.sspl_test_gc_url) Conf.copy("global_config", SSPL_TEST_GLOBAL_CONFIG) # Validate input configs machine_id = Utility.get_machine_id() self.node_type = Conf.get(SSPL_TEST_GLOBAL_CONFIG, "server_node>%s>type" % machine_id) enclosure_id = Conf.get( SSPL_TEST_GLOBAL_CONFIG, "server_node>%s>storage>enclosure_id" % machine_id) self.enclosure_type = Conf.get( SSPL_TEST_GLOBAL_CONFIG, "storage_enclosure>%s>type" % enclosure_id)
def process(self): """Configure sensor monitoring and log level setting.""" # Update sspl.conf with provisioner supplied input config copy Conf.set(consts.SSPL_CONFIG_INDEX, "SYSTEM_INFORMATION>global_config_copy_url", consts.global_config_path) Conf.save(consts.SSPL_CONFIG_INDEX) if os.path.isfile(consts.SSPL_CONFIGURED): os.remove(consts.SSPL_CONFIGURED) os.makedirs(consts.SSPL_CONFIGURED_DIR, exist_ok=True) with open(consts.SSPL_CONFIGURED, 'a'): os.utime(consts.SSPL_CONFIGURED) sspl_uid = Utility.get_uid(consts.USER) sspl_gid = Utility.get_gid(consts.USER) Utility.set_ownership_recursively([consts.SSPL_CONFIGURED], sspl_uid, sspl_gid) # Get the types of server and storage we are currently running on and # enable/disable sensor groups in the conf file accordingly. update_sensor_info(consts.SSPL_CONFIG_INDEX, self.node_type, self.enclosure_type) # Message bus topic creation self.create_message_types() # Skip this step if sspl is being configured for node replacement # scenario as consul data is already # available on healthy node # Updating build requested log level if not os.path.exists(consts.REPLACEMENT_NODE_ENV_VAR_FILE): with open(f"{consts.SSPL_BASE_DIR}/low-level/files/opt/seagate" + \ "/sspl/conf/build-requested-loglevel") as f: log_level = f.read().strip() if not log_level: log_level = "INFO" if log_level in ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]: Conf.set(consts.SSPL_CONFIG_INDEX, "SYSTEM_INFORMATION>log_level", log_level) Conf.save(consts.SSPL_CONFIG_INDEX) else: msg = "Unexpected log level is requested, '%s'" % (log_level) logger.error(msg) raise SetupError(errno.EINVAL, msg)
def validate_group_existence(self): max_retry = 3 for i in range(max_retry): sspl_gid = Utility.get_gid(consts.USER) if sspl_gid != -1: break else: if i == (max_retry - 1): msg = "No group found with name : %s" % (consts.USER) logger.error(msg) raise SetupError(errno.EINVAL, msg) time.sleep(1)
def validate(self): """Check for below requirement. 1. Validate input configs 2. Validate sspl conf is created 3. Check if message bus is accessible """ if os.geteuid() != 0: msg = "Run this command with root privileges." logger.error(msg) raise SetupError(errno.EINVAL, msg) # Validate input/provisioner configs machine_id = Utility.get_machine_id() Utility.get_config_value(consts.PRVSNR_CONFIG_INDEX, "server_node>%s>cluster_id" % machine_id) self.node_type = Utility.get_config_value( consts.PRVSNR_CONFIG_INDEX, "server_node>%s>type" % machine_id) enclosure_id = Utility.get_config_value( consts.PRVSNR_CONFIG_INDEX, "server_node>%s>storage>enclosure_id" % machine_id) self.enclosure_type = Utility.get_config_value( consts.PRVSNR_CONFIG_INDEX, "storage_enclosure>%s>type" % enclosure_id) # Validate sspl conf is created if not os.path.isfile(consts.file_store_config_path): msg = "Missing configuration - %s !! Create and rerun." % ( consts.file_store_config_path) logger.error(msg) raise SetupError(errno.EINVAL, msg) # Validate message bus is accessible self.mb = MessageBus()
def validate(self): # Validate config inputs from framework.utils.utility import Utility Conf.load(GLOBAL_CONFIG_INDEX, global_config_path) self.product = Utility.get_config_value(GLOBAL_CONFIG_INDEX, "cortx>release>product") if self.product is None: msg = "%s - validation failure. %s" % ( self.name, "'Product' name is required to restore suitable configs.") logger.error(msg) raise SetupError(errno.EINVAL, msg) logger.info("%s - Validation done" % self.name)
def init_logging(syslog_host="localhost", syslog_port=514, console_output=False): """Initialize logging for sspl-setup.""" _logger.setLevel(logging.INFO) # set Logging Handlers Utility().create_file(SETUP_LOG_PATH) handler = logging.handlers.RotatingFileHandler(SETUP_LOG_PATH, mode='a', maxBytes=2000000, backupCount=5) fformat = "%(asctime)s %(name)s[%(process)d]: " \ "%(levelname)s %(message)s (%(filename)s:%(lineno)d)" formatter = logging.Formatter(fformat, datefmt='%b %d %H:%M:%S') handler.setFormatter(formatter) _logger.addHandler(handler) if console_output: console = logging.StreamHandler(sys.stderr) console.setFormatter(formatter) _logger.addHandler(console)
def create_directories_and_ownership(self): """Create ras persistent cache directory and state file. Assign ownership recursively on the configured directory. The created state file will be used later by SSPL resourse agent(HA). """ # Extract the data path sspldp = Utility.get_config_value(consts.SSPL_CONFIG_INDEX, "SYSTEM_INFORMATION>data_path") if not sspldp: raise SetupError(errno.EINVAL, "Data path not set in sspl.conf") sspl_uid = Utility.get_uid(consts.USER) sspl_gid = Utility.get_gid(consts.USER) if sspl_uid == -1 or sspl_gid == -1: msg = "No user found with name : %s" % (consts.USER) logger.error(msg) raise SetupError(errno.EINVAL, msg) # Create sspl data directory if not exists os.makedirs(sspldp, exist_ok=True) # Create state file under sspl data directory if not os.path.exists(self.state_file): file = open(self.state_file, "w") file.close() # Create SSPL log and bundle directories os.makedirs(consts.SSPL_LOG_PATH, exist_ok=True) os.makedirs(consts.SSPL_BUNDLE_PATH, exist_ok=True) # set ownership for SSPL dirs list_root_dir = [consts.SSPL_CONFIGURED_DIR, consts.SSPL_LOG_PATH] Utility.set_ownership_recursively(list_root_dir, sspl_uid, sspl_gid) # Create /tmp/dcs/hpi if required. Not required for '<product>' role if self.setup != "cortx": os.makedirs(consts.HPI_PATH, mode=0o777, exist_ok=True) zabbix_uid = Utility.get_uid("zabbix") if zabbix_uid != -1: os.chown(consts.HPI_PATH, zabbix_uid, -1) # Create mdadm.conf to set ACL on it. with open(consts.MDADM_PATH, 'a'): os.utime(consts.MDADM_PATH) os.chmod(consts.MDADM_PATH, mode=0o666) os.chown(consts.MDADM_PATH, sspl_uid, -1)
def validate(self): """Check below requirements. 1. Validate input configs 2. Validate BMC connectivity 3. Validate storage controller connectivity 4. Validate network interface availability """ machine_id = Utility.get_machine_id() mgmt_interfaces = [] data_private_interfaces = [] data_public_interfaces = [] # Validate input/provisioner configs node_type = Utility.get_config_value( PRVSNR_CONFIG_INDEX, "server_node>%s>type" % machine_id) cluster_id = Utility.get_config_value( PRVSNR_CONFIG_INDEX, "server_node>%s>cluster_id" % machine_id) if node_type.lower() not in ["virtual", "vm"]: bmc_ip = Utility.get_config_value( PRVSNR_CONFIG_INDEX, "server_node>%s>bmc>ip" % machine_id) bmc_user = Utility.get_config_value( PRVSNR_CONFIG_INDEX, "server_node>%s>bmc>user" % machine_id) bmc_secret = Utility.get_config_value( PRVSNR_CONFIG_INDEX, "server_node>%s>bmc>secret" % machine_id) bmc_key = Cipher.generate_key(machine_id, ServiceTypes.SERVER_NODE.value) bmc_passwd = Cipher.decrypt( bmc_key, bmc_secret.encode("utf-8")).decode("utf-8") data_private_interfaces = Utility.get_config_value( PRVSNR_CONFIG_INDEX, "server_node>%s>network>data>private_interfaces" % machine_id) data_public_interfaces = Utility.get_config_value( PRVSNR_CONFIG_INDEX, "server_node>%s>network>data>public_interfaces" % machine_id) mgmt_public_fqdn = Utility.get_config_value( PRVSNR_CONFIG_INDEX, "server_node>%s>network>management>public_fqdn" % machine_id) mgmt_interfaces = Utility.get_config_value( PRVSNR_CONFIG_INDEX, "server_node>%s>network>management>interfaces" % machine_id) data_private_fqdn = Utility.get_config_value( PRVSNR_CONFIG_INDEX, "server_node>%s>network>data>private_fqdn" % machine_id) data_public_fqdn = Utility.get_config_value( PRVSNR_CONFIG_INDEX, "server_node>%s>network>data>public_fqdn" % machine_id) enclosure_id = Utility.get_config_value( PRVSNR_CONFIG_INDEX, "server_node>%s>storage>enclosure_id" % machine_id) primary_ip = Utility.get_config_value( PRVSNR_CONFIG_INDEX, "storage_enclosure>%s>controller>primary>ip" % enclosure_id) secondary_ip = Utility.get_config_value( PRVSNR_CONFIG_INDEX, "storage_enclosure>%s>controller>secondary>ip" % enclosure_id) cntrlr_user = Utility.get_config_value( PRVSNR_CONFIG_INDEX, "storage_enclosure>%s>controller>user" % enclosure_id) cntrlr_secret = Utility.get_config_value( PRVSNR_CONFIG_INDEX, "storage_enclosure>%s>controller>secret" % enclosure_id) cntrlr_key = Cipher.generate_key(enclosure_id, ServiceTypes.STORAGE_ENCLOSURE.value) cntrlr_passwd = Cipher.decrypt( cntrlr_key, cntrlr_secret.encode("utf-8")).decode("utf-8") # Validate BMC connectivity & storage controller accessibility if node_type.lower() not in ["virtual", "vm"]: NetworkV().validate("connectivity", [bmc_ip, primary_ip, secondary_ip]) BmcV().validate("accessible", [socket.getfqdn(), bmc_ip, bmc_user, bmc_passwd]) c_validator = ControllerV() c_validator.validate("accessible", [primary_ip, cntrlr_user, cntrlr_passwd]) c_validator.validate("accessible", [secondary_ip, cntrlr_user, cntrlr_passwd]) # Validate network fqdn reachability NetworkV().validate( "connectivity", [mgmt_public_fqdn, data_private_fqdn, data_public_fqdn]) # Validate network interface availability for i_list in [ mgmt_interfaces, data_private_interfaces, data_public_interfaces ]: self.validate_nw_cable_connection(i_list) self.validate_nw_interfaces(i_list)
def configure_sspl_syslog(self): """Configure log file path in rsyslog and update logrotate config file.""" system_files_root = "%s/low-level/files" % consts.SSPL_BASE_DIR sspl_log_file_path = Utility.get_config_value(consts.SSPL_CONFIG_INDEX, "SYSTEM_INFORMATION>sspl_log_file_path") sspl_sb_log_file_path = sspl_log_file_path.replace("/sspl.log","/sspl_support_bundle.log") iem_log_file_path = Utility.get_config_value(consts.SSPL_CONFIG_INDEX, "IEMSENSOR>log_file_path") manifest_log_file_path = sspl_log_file_path.replace("/sspl.log","/manifest.log") # IEM configuration os.makedirs("%s/iem/iec_mapping" % consts.PRODUCT_BASE_DIR, exist_ok=True) distutils.dir_util.copy_tree("%s/iec_mapping/" % system_files_root, "%s/iem/iec_mapping" % consts.PRODUCT_BASE_DIR) if not os.path.exists(consts.RSYSLOG_IEM_CONF): shutil.copyfile("%s/%s" % (system_files_root, consts.RSYSLOG_IEM_CONF), consts.RSYSLOG_IEM_CONF) # Update log location as per sspl.conf Utility.replace_expr(consts.RSYSLOG_IEM_CONF, 'File.*[=,"]', 'File="%s"' % iem_log_file_path) # SSPL rsys log configuration if not os.path.exists(consts.RSYSLOG_SSPL_CONF): shutil.copyfile("%s/%s" % (system_files_root, consts.RSYSLOG_SSPL_CONF), consts.RSYSLOG_SSPL_CONF) # Update log location as per sspl.conf Utility.replace_expr(consts.RSYSLOG_SSPL_CONF, 'File.*[=,"]', 'File="%s"' % sspl_log_file_path) # Manifest Bundle log configuration if not os.path.exists(consts.RSYSLOG_MSB_CONF): shutil.copyfile("%s/%s" % (system_files_root, consts.RSYSLOG_MSB_CONF), consts.RSYSLOG_MSB_CONF) # Update log location as per sspl.conf Utility.replace_expr(consts.RSYSLOG_MSB_CONF, 'File.*[=,"]', 'File="%s"' % manifest_log_file_path) # Support Bundle log configuration if not os.path.exists(consts.RSYSLOG_SB_CONF): shutil.copyfile("%s/%s" % (system_files_root, consts.RSYSLOG_SB_CONF), consts.RSYSLOG_SB_CONF) # Update log location as per sspl.conf Utility.replace_expr(consts.RSYSLOG_SB_CONF, 'File.*[=,"]', 'File="%s"' % sspl_sb_log_file_path) # Configure logrotate # Create logrotate dir in case it's not present os.makedirs(consts.LOGROTATE_DIR, exist_ok=True) Utility.replace_expr("%s/etc/logrotate.d/iem_messages" % system_files_root, 0, iem_log_file_path) Utility.replace_expr("%s/etc/logrotate.d/sspl_logs" % system_files_root, 0, sspl_log_file_path) Utility.replace_expr("%s/etc/logrotate.d/sspl_sb_logs" % system_files_root, 0, sspl_sb_log_file_path) shutil.copy2("%s/etc/logrotate.d/iem_messages" % system_files_root, consts.IEM_LOGROTATE_CONF) shutil.copy2("%s/etc/logrotate.d/sspl_logs" % system_files_root, consts.SSPL_LOGROTATE_CONF) shutil.copy2("%s/etc/logrotate.d/manifest_logs" % system_files_root, consts.MSB_LOGROTATE_CONF) shutil.copy2("%s/etc/logrotate.d/sspl_sb_logs" % system_files_root, consts.SB_LOGROTATE_CONF) # This rsyslog restart will happen after successful updation of rsyslog # conf file and before sspl starts. If at all this will be removed from # here, there will be a chance that SSPL intial logs will not be present in # "/var/log/<product>/sspl/sspl.log" file. So, initial logs needs to be collected from # "/var/log/messages" service = DbusServiceHandler() service.restart('rsyslog.service')
NODE_TYPE = "node_type" SMART_TEST_INTERVAL = "smart_test_interval" SSPL_LOG_FILE_PATH = "sspl_log_file_path" STORAGE = "storage" STORAGE_SET_ID = "storage_set_id" STORE_TYPE = "store_type" THREADED = "threaded" TIMESTAMP_FILE_PATH = "timestamp_file_path" TRANSMIT_INTERVAL = "transmit_interval" TYPE = "type" UNITS = "units" USER = "******" USERNAME = "******" VIRTUAL_HOST = "virtual_host" TARGET_BUILD = "target_build" # Get SRVNODE and ENCLOSURE so it can be used in other files to get # server_node and enclosure specific config utility = Utility() MACHINE_ID = utility.get_machine_id() OPERATING_SYSTEM = utility.get_os() Conf.load(SSPL_CONF, "yaml:///etc/sspl.conf") global_config = Conf.get(SSPL_CONF, "SYSTEM_INFORMATION>global_config_copy_url") Conf.load(GLOBAL_CONF, global_config) SRVNODE = Conf.get(GLOBAL_CONF, f'{CLUSTER}>{SERVER_NODES}')[MACHINE_ID] ENCLOSURE = Conf.get(GLOBAL_CONF, f"{CLUSTER}>{SRVNODE}>{STORAGE}>{ENCLOSURE_ID}")
def validate(self): """Check for below requirement. 1. Validate if sspl-ll user exists 2. Validate input keys """ # Check sspl-ll user exists sspl_uid = Utility.get_uid(self.user) if sspl_uid == -1: msg = "User %s doesn't exist. Please add user." % self.user logger.error(msg) raise SetupError(errno.EINVAL, msg) # Check input/provisioner configs machine_id = Utility.get_machine_id() cluster_id = Utility.get_config_value( consts.PRVSNR_CONFIG_INDEX, "server_node>%s>cluster_id" % machine_id) Utility.get_config_value(consts.PRVSNR_CONFIG_INDEX, "cluster>%s>name" % cluster_id) Utility.get_config_value(consts.PRVSNR_CONFIG_INDEX, "cluster>%s>site_count" % cluster_id) storage_set_count = int( Utility.get_config_value( consts.PRVSNR_CONFIG_INDEX, "cluster>%s>site>storage_set_count" % cluster_id)) for i in range(storage_set_count): Utility.get_config_value( consts.PRVSNR_CONFIG_INDEX, "cluster>%s>storage_set[%s]>name" % (cluster_id, i)) Utility.get_config_value( consts.PRVSNR_CONFIG_INDEX, "cluster>%s>storage_set[%s]>server_nodes" % (cluster_id, i)) Utility.get_config_value( consts.PRVSNR_CONFIG_INDEX, "cluster>%s>storage_set[%s]>storage_enclosures" % (cluster_id, i))
def validate(self): """Check for required packages are installed.""" # RPM dependency rpm_deps = {"cortx-sspl-test": None} # python 3rd party package dependency pip3_packages_dep = {"Flask": "1.1.1", "coverage": "5.5"} if not self.coverage_enabled: pip3_packages_dep.pop("coverage") # Validate pip3 python pkg with required version. for pkg, version in pip3_packages_dep.items(): installed_pkg = None uninstalled_pkg = False try: pkg_req = Requirement.parse(f"{pkg}=={version}") installed_pkg = working_set.find(pkg_req) except VersionConflict: cmd = f'pip3 uninstall -y {pkg}' _, err, ret = SimpleProcess(cmd).run() if ret: raise TestException( "Failed to uninstall the pip3 pkg: %s(v%s), " "due to an Error: %s" % (pkg, version, err)) uninstalled_pkg = True except Exception as err: raise TestException("Failed at verification of pip3 pkg: %s, " "due to an Error: %s" % (pkg, err)) if not installed_pkg or uninstalled_pkg: cmd = f'pip3 install {pkg}=={version}' _, err, ret = SimpleProcess(cmd).run() if ret: raise TestException( "Failed to install the pip3 pkg: %s(v%s), " "due to an Error: %s" % (pkg, version, err)) logger.info(f"Ensured Package Dependency: {pkg}(v{version}).") # Validate rpm dependencies pkg_validator = PkgV() pkg_validator.validate_rpm_pkgs(host=socket.getfqdn(), pkgs=rpm_deps, skip_version_check=True) # Load global, sspl and test configs Conf.load(SSPL_CONFIG_INDEX, sspl_config_path) Conf.load(SSPL_TEST_CONFIG_INDEX, sspl_test_config_path) # Take copy of supplied config passed to sspl_test and load it with open(self.sspl_test_gc_copy_file, "w") as f: f.write("") self.sspl_test_gc_copy_url = "yaml://%s" % self.sspl_test_gc_copy_file Conf.load(SSPL_TEST_GLOBAL_CONFIG, self.sspl_test_gc_copy_url) Conf.load("global_config", self.sspl_test_gc_url) Conf.copy("global_config", SSPL_TEST_GLOBAL_CONFIG) # Validate input configs machine_id = Utility.get_machine_id() self.node_type = Conf.get(SSPL_TEST_GLOBAL_CONFIG, "server_node>%s>type" % machine_id) enclosure_id = Conf.get( SSPL_TEST_GLOBAL_CONFIG, "server_node>%s>storage>enclosure_id" % machine_id) self.enclosure_type = Conf.get( SSPL_TEST_GLOBAL_CONFIG, "storage_enclosure>%s>type" % enclosure_id)
def cleanup_log_and_config(): """--pre-factory cleanup : Cleanup logs, config files and undo everything whatever was done in post-install Mini-Provisioner Interface.""" Conf.load(SSPL_CONFIG_INDEX, sspl_config_path) sspl_log_file_path = Utility.get_config_value( SSPL_CONFIG_INDEX, "SYSTEM_INFORMATION>sspl_log_file_path") iem_log_file_path = Utility.get_config_value( SSPL_CONFIG_INDEX, "IEMSENSOR>log_file_path") message_types = [ Utility.get_config_value(SSPL_CONFIG_INDEX, "INGRESSPROCESSOR>message_type"), Utility.get_config_value(SSPL_CONFIG_INDEX, "EGRESSPROCESSOR>message_type")] # Directories and file which needs to deleted. directories = [ f'/var/{PRODUCT_FAMILY}/sspl', f'/var/{PRODUCT_FAMILY}/iem/', f'/var/log/{PRODUCT_FAMILY}/sspl/', f'/var/log/{PRODUCT_FAMILY}/iem/', '/etc/sspl-ll/', f'{PRODUCT_BASE_DIR}/iem/iec_mapping'] sspl_sudoers_file = '/etc/sudoers.d/sspl' sspl_dbus_policy_rules = '/etc/polkit-1/rules.d/sspl-ll_dbus_policy.rules' sspl_dbus_policy_conf = '/etc/dbus-1/system.d/sspl-ll_dbus_policy.conf' sspl_service_file = '/etc/systemd/system/sspl-ll.service' sspl_test_backup = '/etc/sspl_tests.conf.back' sspl_test_file_path = '/etc/sspl_test_gc_url.yaml' sspl_sb_log_file_path = sspl_log_file_path.replace( "/sspl.log", "/sspl_support_bundle.log") manifest_log_file_path = sspl_log_file_path.replace( "/sspl.log", "/manifest.log") # symlinks created during post_install sspl_ll_cli = "/usr/bin/sspl_ll_cli" # Remove SSPL config other config/log files which were # created during post_install. for filepath in [ sspl_ll_cli, sspl_test_backup, sspl_test_file_path, file_store_config_path, global_config_file_path, sspl_log_file_path, iem_log_file_path, sspl_sb_log_file_path, manifest_log_file_path, RSYSLOG_IEM_CONF, IEM_LOGROTATE_CONF, sspl_dbus_policy_conf, sspl_dbus_policy_rules, sspl_sudoers_file, sspl_service_file]: FileStore().delete(filepath) # Delete directories which were created during post_install. for directory in directories: FileStore().delete(directory) logger.info("Deleted config/log files and directories.") # Delete sspl-ll user usernames = [x[0] for x in pwd.getpwall()] if USER in usernames: _, err, rc = SimpleProcess("/usr/sbin/userdel -f %s" % USER).run() if rc != 0: logger.info("Error occurref while deleteing %s user. ERROR: %s" %(USER, err)) else: logger.info("Deleted %s user." % USER) # Delete topic mbadmin = MessageBusAdmin(admin_id="admin") try: mbadmin.deregister_message_type(message_types) logger.info("Delete kafka %s topics." % message_types) except MessageBusError as e: logger.error(f"MessageBusError occurred while deleting topic:{e}")
def _generate_cpu_data(self): """Create & transmit a cpu_data message as defined by the sensor response json schema""" current_time = Utility.get_current_time() # Notify the node sensor to update its data required for the cpu_data message successful = self._node_sensor.read_data("cpu_data", self._get_debug()) if not successful: logger.error( "NodeDataMsgHandler, _generate_cpu_data was NOT successful.") self._cpu_usage_threshold = str(self._cpu_usage_threshold) try: if self._cpu_usage_threshold.isdigit(): self._cpu_usage_threshold = int(self._cpu_usage_threshold) else: self._cpu_usage_threshold = float(self._cpu_usage_threshold) except ValueError: logger.warn( "CPU Usage Alert, Invalid host_memory_usage_threshold value are entered in config." ) # Assigning default value to _cpu_usage_threshold self._cpu_usage_threshold = self.DEFAULT_CPU_USAGE_THRESHOLD cpu_persistent_data = self.read_persistent_data('CPU_USAGE_DATA') if cpu_persistent_data.get('cpu_usage_time_map') is not None: previous_check_time = int( cpu_persistent_data['cpu_usage_time_map']) else: previous_check_time = int(-1) if cpu_persistent_data.get( 'cpu_fault_resolved_iterations') is not None: fault_resolved_iters = int( cpu_persistent_data['cpu_fault_resolved_iterations']) else: fault_resolved_iters = 0 try: iteration_limit = int(self._high_cpu_usage_wait_threshold / self._transmit_interval) except ZeroDivisionError: iteration_limit = 0 self.usage_time_map['cpu'] = current_time if self._node_sensor.cpu_usage >= self._cpu_usage_threshold \ and not self.high_usage['cpu']: if previous_check_time == -1: previous_check_time = current_time self.persist_state_data('cpu', 'CPU_USAGE_DATA') if self.usage_time_map[ 'cpu'] - previous_check_time >= self._high_cpu_usage_wait_threshold: self.high_usage['cpu'] = True self.fault_resolved_iterations['cpu'] = 0 # Create the cpu usage data message and hand it over # to the egress processor to transmit fault_event = "CPU usage has increased to {}%, "\ "beyond the configured threshold of {}% "\ "for more than {} seconds.".format( self._node_sensor.cpu_usage, self._cpu_usage_threshold, self._high_cpu_usage_wait_threshold ) logger.warn(fault_event) # Create the cpu usage update message and hand it over to the egress processor to transmit cpuDataMsg = CPUdataMsg( self._node_sensor.host_id, self._epoch_time, self._node_sensor.csps, self._node_sensor.idle_time, self._node_sensor.interrupt_time, self._node_sensor.iowait_time, self._node_sensor.nice_time, self._node_sensor.softirq_time, self._node_sensor.steal_time, self._node_sensor.system_time, self._node_sensor.user_time, self._node_sensor.cpu_core_data, self._node_sensor.cpu_usage, self.FAULT, fault_event) # Add in uuid if it was present in the json request if self._uuid is not None: cpuDataMsg.set_uuid(self._uuid) jsonMsg = cpuDataMsg.getJson() self.cpu_sensor_data = jsonMsg self.os_sensor_type["cpu_usage"] = self.cpu_sensor_data # Transmit it to message processor self._write_internal_msgQ(EgressProcessor.name(), jsonMsg) # Store the state to Persistent Cache. self.persist_state_data('cpu', 'CPU_USAGE_DATA') if self._node_sensor.cpu_usage < self._cpu_usage_threshold: if not self.high_usage['cpu']: self.persist_state_data('cpu', 'CPU_USAGE_DATA') else: if fault_resolved_iters < iteration_limit: fault_resolved_iters += 1 self.fault_resolved_iterations[ 'cpu'] = fault_resolved_iters self.persist_state_data('cpu', 'CPU_USAGE_DATA') elif fault_resolved_iters >= iteration_limit: # Create the cpu usage data message and hand it over # to the egress processor to transmit fault_resolved_event = "CPU usage has decreased to {}%, "\ "lower than the configured threshold of {}%.".format( self._node_sensor.cpu_usage, self._cpu_usage_threshold ) logger.info(fault_resolved_event) # Create the cpu usage update message and hand it over to the egress processor to transmit cpuDataMsg = CPUdataMsg( self._node_sensor.host_id, self._epoch_time, self._node_sensor.csps, self._node_sensor.idle_time, self._node_sensor.interrupt_time, self._node_sensor.iowait_time, self._node_sensor.nice_time, self._node_sensor.softirq_time, self._node_sensor.steal_time, self._node_sensor.system_time, self._node_sensor.user_time, self._node_sensor.cpu_core_data, self._node_sensor.cpu_usage, self.FAULT_RESOLVED, fault_resolved_event) # Add in uuid if it was present in the json request if self._uuid is not None: cpuDataMsg.set_uuid(self._uuid) jsonMsg = cpuDataMsg.getJson() self.cpu_sensor_data = jsonMsg self.os_sensor_type["cpu_usage"] = self.cpu_sensor_data # Transmit it to message processor self._write_internal_msgQ(EgressProcessor.name(), jsonMsg) self.high_usage['cpu'] = False self.usage_time_map['cpu'] = int(-1) self.fault_resolved_iterations['cpu'] = 0 # Store the state to Persistent Cache. self.persist_state_data('cpu', 'CPU_USAGE_DATA')
def _generate_host_update(self): """Create & transmit a host update message as defined by the sensor response json schema""" current_time = Utility.get_current_time() # Notify the node sensor to update its data required for the host_update message successful = self._node_sensor.read_data("host_update", self._get_debug(), self._units) if not successful: logger.error( "NodeDataMsgHandler, _generate_host_update was NOT successful." ) self._host_memory_usage_threshold = str( self._host_memory_usage_threshold) try: if self._host_memory_usage_threshold.isdigit(): self._host_memory_usage_threshold = int( self._host_memory_usage_threshold) else: self._host_memory_usage_threshold = float( self._host_memory_usage_threshold) except ValueError: logger.warn( "Host Memory Alert, Invalid host_memory_usage_threshold value are entered in config." ) # Assigning default value to _memory_usage_threshold self._host_memory_usage_threshold = self.DEFAULT_HOST_MEMORY_USAGE_THRESHOLD memory_persistent_data = self.read_persistent_data('MEMORY_USAGE_DATA') if memory_persistent_data.get('memory_usage_time_map') is not None: previous_check_time = int( memory_persistent_data['memory_usage_time_map']) else: previous_check_time = int(-1) if memory_persistent_data\ .get('memory_fault_resolved_iterations') is not None: fault_resolved_iters = int( memory_persistent_data['memory_fault_resolved_iterations']) else: fault_resolved_iters = 0 try: iteration_limit = int(self._high_memory_usage_wait_threshold / self._transmit_interval) except ZeroDivisionError: iteration_limit = 0 self.usage_time_map['memory'] = current_time if self._node_sensor.total_memory["percent"] >= self._host_memory_usage_threshold \ and not self.high_usage['memory']: if previous_check_time == -1: previous_check_time = current_time self.persist_state_data('memory', 'MEMORY_USAGE_DATA') if self.usage_time_map[ 'memory'] - previous_check_time >= self._high_memory_usage_wait_threshold: self.high_usage['memory'] = True self.fault_resolved_iterations['memory'] = 0 # Create the memory data message and hand it over # to the egress processor to transmit fault_event = "Host memory usage has increased to {}%,"\ "beyond the configured threshold of {}% "\ "for more than {} seconds.".format( self._node_sensor.total_memory["percent"], self._host_memory_usage_threshold, self._high_memory_usage_wait_threshold ) logger.warn(fault_event) logged_in_users = [] # Create the host update message and hand it over to the egress processor to transmit hostUpdateMsg = HostUpdateMsg( self._node_sensor.host_id, self._epoch_time, self._node_sensor.boot_time, self._node_sensor.up_time, self._node_sensor.uname, self._units, self._node_sensor.total_memory, self._node_sensor.logged_in_users, self._node_sensor.process_count, self._node_sensor.running_process_count, self.FAULT, fault_event) # Add in uuid if it was present in the json request if self._uuid is not None: hostUpdateMsg.set_uuid(self._uuid) jsonMsg = hostUpdateMsg.getJson() # Transmit it to message processor self.host_sensor_data = jsonMsg self.os_sensor_type["memory_usage"] = self.host_sensor_data self._write_internal_msgQ(EgressProcessor.name(), jsonMsg) self.persist_state_data('memory', 'MEMORY_USAGE_DATA') if self._node_sensor.total_memory[ "percent"] < self._host_memory_usage_threshold: if not self.high_usage['memory']: self.persist_state_data('memory', 'MEMORY_USAGE_DATA') else: if fault_resolved_iters < iteration_limit: fault_resolved_iters += 1 self.fault_resolved_iterations[ 'memory'] = fault_resolved_iters self.persist_state_data('memory', 'MEMORY_USAGE_DATA') elif fault_resolved_iters >= iteration_limit: # Create the memory data message and hand it over # to the egress processor to transmit fault_resolved_event = "Host memory usage has decreased to {}%, "\ "lower than the configured threshold of {}%.".format( self._node_sensor.total_memory["percent"], self._host_memory_usage_threshold ) logger.info(fault_resolved_event) logged_in_users = [] # Create the host update message and hand it over to the egress processor to transmit hostUpdateMsg = HostUpdateMsg( self._node_sensor.host_id, self._epoch_time, self._node_sensor.boot_time, self._node_sensor.up_time, self._node_sensor.uname, self._units, self._node_sensor.total_memory, self._node_sensor.logged_in_users, self._node_sensor.process_count, self._node_sensor.running_process_count, self.FAULT_RESOLVED, fault_resolved_event) # Add in uuid if it was present in the json request if self._uuid is not None: hostUpdateMsg.set_uuid(self._uuid) jsonMsg = hostUpdateMsg.getJson() # Transmit it to message processor self.host_sensor_data = jsonMsg self.os_sensor_type["memory_usage"] = self.host_sensor_data self._write_internal_msgQ(EgressProcessor.name(), jsonMsg) self.high_usage['memory'] = False self.usage_time_map['memory'] = int(-1) self.fault_resolved_iterations['memory'] = 0 self.persist_state_data('memory', 'MEMORY_USAGE_DATA')