Beispiel #1
0
    def validate(self):
        """Check below requirements are met in setup.
        1. Check if given product is supported by SSPL
        2. Check if given setup is supported by SSPL
        3. Check if required pre-requisites softwares are installed.
        4. Validate BMC connectivity
        5. Validate storage controller connectivity
        """
        machine_id = Utility.get_machine_id()

        # Validate input/provisioner configs
        self.product = Utility.get_config_value(consts.PRVSNR_CONFIG_INDEX,
                                                "cortx>release>product")
        self.setup = Utility.get_config_value(consts.PRVSNR_CONFIG_INDEX,
                                              "cortx>release>setup")
        node_type = Utility.get_config_value(
            consts.PRVSNR_CONFIG_INDEX, "server_node>%s>type" % machine_id)
        if node_type.lower() not in ["vm", "virtual"]:
            bmc_ip = Utility.get_config_value(
                consts.PRVSNR_CONFIG_INDEX,
                "server_node>%s>bmc>ip" % machine_id)
        enclosure_id = Utility.get_config_value(
            consts.PRVSNR_CONFIG_INDEX,
            "server_node>%s>storage>enclosure_id" % machine_id)
        Utility.get_config_value(consts.PRVSNR_CONFIG_INDEX,
                                 "storage_enclosure>%s>type" % enclosure_id)
        primary_ip = Utility.get_config_value(
            consts.PRVSNR_CONFIG_INDEX,
            "storage_enclosure>%s>controller>primary>ip" % enclosure_id)
        secondary_ip = Utility.get_config_value(
            consts.PRVSNR_CONFIG_INDEX,
            "storage_enclosure>%s>controller>secondary>ip" % enclosure_id)

        # Validate product support
        if self.product not in consts.enabled_products:
            msg = "Product '%s' is not in sspl supported product list: %s" % (
                self.product, consts.enabled_products)
            logger.error(msg)
            raise SetupError(errno.EINVAL, msg)

        # Validate setup support
        if self.setup not in consts.setups:
            msg = "Setup '%s' is not in sspl supported setup list: %s" % (
                self.setup, consts.setups)
            logger.error(msg)
            raise SetupError(errno.EINVAL, msg)

        # Validate required pip3s and rpms are installed
        self.validate_dependencies(self.setup)

        # Validate BMC & Storage controller IP reachability
        if node_type.lower() not in ["vm", "virtual"]:
            # cluster_id required for decrypting the secret is only available from
            # the prepare stage. However accessibility validation will be done in
            # prepare stage. So at this time, validating ip reachability is fine.
            NetworkV().validate("connectivity",
                                [bmc_ip, primary_ip, secondary_ip])
 def validate(self):
     """Check for required packages are installed."""
     # RPM dependency
     rpm_deps = {"cortx-sspl-test": None}
     # python 3rd party package dependency
     pip3_3ps_packages_test = {"Flask": "1.1.1"}
     pkg_validator = PkgV()
     pkg_validator.validate_pip3_pkgs(host=socket.getfqdn(),
                                      pkgs=pip3_3ps_packages_test,
                                      skip_version_check=False)
     pkg_validator.validate_rpm_pkgs(host=socket.getfqdn(),
                                     pkgs=rpm_deps,
                                     skip_version_check=True)
     # Load global, sspl and test configs
     Conf.load(SSPL_CONFIG_INDEX, sspl_config_path)
     Conf.load(SSPL_TEST_CONFIG_INDEX, sspl_test_config_path)
     # Take copy of supplied config passed to sspl_test and load it
     with open(self.sspl_test_gc_copy_file, "w") as f:
         f.write("")
     self.sspl_test_gc_copy_url = "yaml://%s" % self.sspl_test_gc_copy_file
     Conf.load(SSPL_TEST_GLOBAL_CONFIG, self.sspl_test_gc_copy_url)
     Conf.load("global_config", self.sspl_test_gc_url)
     Conf.copy("global_config", SSPL_TEST_GLOBAL_CONFIG)
     # Validate input configs
     machine_id = Utility.get_machine_id()
     self.node_type = Conf.get(SSPL_TEST_GLOBAL_CONFIG,
                               "server_node>%s>type" % machine_id)
     enclosure_id = Conf.get(
         SSPL_TEST_GLOBAL_CONFIG,
         "server_node>%s>storage>enclosure_id" % machine_id)
     self.enclosure_type = Conf.get(
         SSPL_TEST_GLOBAL_CONFIG,
         "storage_enclosure>%s>type" % enclosure_id)
Beispiel #3
0
    def process(self):
        """Configure sensor monitoring and log level setting."""
        # Update sspl.conf with provisioner supplied input config copy
        Conf.set(consts.SSPL_CONFIG_INDEX,
                 "SYSTEM_INFORMATION>global_config_copy_url",
                 consts.global_config_path)
        Conf.save(consts.SSPL_CONFIG_INDEX)

        if os.path.isfile(consts.SSPL_CONFIGURED):
            os.remove(consts.SSPL_CONFIGURED)

        os.makedirs(consts.SSPL_CONFIGURED_DIR, exist_ok=True)
        with open(consts.SSPL_CONFIGURED, 'a'):
            os.utime(consts.SSPL_CONFIGURED)
        sspl_uid = Utility.get_uid(consts.USER)
        sspl_gid = Utility.get_gid(consts.USER)
        Utility.set_ownership_recursively([consts.SSPL_CONFIGURED], sspl_uid,
                                          sspl_gid)

        # Get the types of server and storage we are currently running on and
        # enable/disable sensor groups in the conf file accordingly.
        update_sensor_info(consts.SSPL_CONFIG_INDEX, self.node_type,
                           self.enclosure_type)

        # Message bus topic creation
        self.create_message_types()

        # Skip this step if sspl is being configured for node replacement
        # scenario as consul data is already
        # available on healthy node
        # Updating build requested log level
        if not os.path.exists(consts.REPLACEMENT_NODE_ENV_VAR_FILE):
            with open(f"{consts.SSPL_BASE_DIR}/low-level/files/opt/seagate" + \
                "/sspl/conf/build-requested-loglevel") as f:
                log_level = f.read().strip()

            if not log_level:
                log_level = "INFO"

            if log_level in ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]:
                Conf.set(consts.SSPL_CONFIG_INDEX,
                         "SYSTEM_INFORMATION>log_level", log_level)
                Conf.save(consts.SSPL_CONFIG_INDEX)
            else:
                msg = "Unexpected log level is requested, '%s'" % (log_level)
                logger.error(msg)
                raise SetupError(errno.EINVAL, msg)
Beispiel #4
0
 def validate_group_existence(self):
     max_retry = 3
     for i in range(max_retry):
         sspl_gid = Utility.get_gid(consts.USER)
         if sspl_gid != -1:
             break
         else:
             if i == (max_retry - 1):
                 msg = "No group found with name : %s" % (consts.USER)
                 logger.error(msg)
                 raise SetupError(errno.EINVAL, msg)
             time.sleep(1)
Beispiel #5
0
    def validate(self):
        """Check for below requirement.

        1. Validate input configs
        2. Validate sspl conf is created
        3. Check if message bus is accessible
        """
        if os.geteuid() != 0:
            msg = "Run this command with root privileges."
            logger.error(msg)
            raise SetupError(errno.EINVAL, msg)

        # Validate input/provisioner configs
        machine_id = Utility.get_machine_id()
        Utility.get_config_value(consts.PRVSNR_CONFIG_INDEX,
                                 "server_node>%s>cluster_id" % machine_id)
        self.node_type = Utility.get_config_value(
            consts.PRVSNR_CONFIG_INDEX, "server_node>%s>type" % machine_id)
        enclosure_id = Utility.get_config_value(
            consts.PRVSNR_CONFIG_INDEX,
            "server_node>%s>storage>enclosure_id" % machine_id)
        self.enclosure_type = Utility.get_config_value(
            consts.PRVSNR_CONFIG_INDEX,
            "storage_enclosure>%s>type" % enclosure_id)

        # Validate sspl conf is created
        if not os.path.isfile(consts.file_store_config_path):
            msg = "Missing configuration - %s !! Create and rerun." % (
                consts.file_store_config_path)
            logger.error(msg)
            raise SetupError(errno.EINVAL, msg)

        # Validate message bus is accessible
        self.mb = MessageBus()
 def validate(self):
     # Validate config inputs
     from framework.utils.utility import Utility
     Conf.load(GLOBAL_CONFIG_INDEX, global_config_path)
     self.product = Utility.get_config_value(GLOBAL_CONFIG_INDEX,
                                             "cortx>release>product")
     if self.product is None:
         msg = "%s - validation failure. %s" % (
             self.name,
             "'Product' name is required to restore suitable configs.")
         logger.error(msg)
         raise SetupError(errno.EINVAL, msg)
     logger.info("%s - Validation done" % self.name)
Beispiel #7
0
def init_logging(syslog_host="localhost",
                 syslog_port=514,
                 console_output=False):
    """Initialize logging for sspl-setup."""
    _logger.setLevel(logging.INFO)
    # set Logging Handlers
    Utility().create_file(SETUP_LOG_PATH)

    handler = logging.handlers.RotatingFileHandler(SETUP_LOG_PATH,
                                                   mode='a',
                                                   maxBytes=2000000,
                                                   backupCount=5)

    fformat = "%(asctime)s %(name)s[%(process)d]: " \
                "%(levelname)s %(message)s (%(filename)s:%(lineno)d)"
    formatter = logging.Formatter(fformat, datefmt='%b %d %H:%M:%S')
    handler.setFormatter(formatter)
    _logger.addHandler(handler)
    if console_output:
        console = logging.StreamHandler(sys.stderr)
        console.setFormatter(formatter)
        _logger.addHandler(console)
Beispiel #8
0
    def create_directories_and_ownership(self):
        """Create ras persistent cache directory and state file.

        Assign ownership recursively on the configured directory.
        The created state file will be used later by SSPL resourse agent(HA).
        """
        # Extract the data path
        sspldp = Utility.get_config_value(consts.SSPL_CONFIG_INDEX,
            "SYSTEM_INFORMATION>data_path")
        if not sspldp:
            raise SetupError(errno.EINVAL, "Data path not set in sspl.conf")
        sspl_uid = Utility.get_uid(consts.USER)
        sspl_gid = Utility.get_gid(consts.USER)
        if sspl_uid == -1 or sspl_gid == -1:
            msg = "No user found with name : %s" % (consts.USER)
            logger.error(msg)
            raise SetupError(errno.EINVAL, msg)
        # Create sspl data directory if not exists
        os.makedirs(sspldp, exist_ok=True)
        # Create state file under sspl data directory
        if not os.path.exists(self.state_file):
            file = open(self.state_file, "w")
            file.close()

        # Create SSPL log and bundle directories
        os.makedirs(consts.SSPL_LOG_PATH, exist_ok=True)
        os.makedirs(consts.SSPL_BUNDLE_PATH, exist_ok=True)
        # set ownership for SSPL dirs
        list_root_dir = [consts.SSPL_CONFIGURED_DIR, consts.SSPL_LOG_PATH]
        Utility.set_ownership_recursively(list_root_dir, sspl_uid, sspl_gid)
        # Create /tmp/dcs/hpi if required. Not required for '<product>' role
        if self.setup != "cortx":
            os.makedirs(consts.HPI_PATH, mode=0o777, exist_ok=True)
            zabbix_uid = Utility.get_uid("zabbix")
            if zabbix_uid != -1:
                os.chown(consts.HPI_PATH, zabbix_uid, -1)
        # Create mdadm.conf to set ACL on it.
        with open(consts.MDADM_PATH, 'a'):
            os.utime(consts.MDADM_PATH)
        os.chmod(consts.MDADM_PATH, mode=0o666)
        os.chown(consts.MDADM_PATH, sspl_uid, -1)
    def validate(self):
        """Check below requirements.

        1. Validate input configs
        2. Validate BMC connectivity
        3. Validate storage controller connectivity
        4. Validate network interface availability
        """
        machine_id = Utility.get_machine_id()
        mgmt_interfaces = []
        data_private_interfaces = []
        data_public_interfaces = []

        # Validate input/provisioner configs
        node_type = Utility.get_config_value(
            PRVSNR_CONFIG_INDEX, "server_node>%s>type" % machine_id)
        cluster_id = Utility.get_config_value(
            PRVSNR_CONFIG_INDEX, "server_node>%s>cluster_id" % machine_id)
        if node_type.lower() not in ["virtual", "vm"]:
            bmc_ip = Utility.get_config_value(
                PRVSNR_CONFIG_INDEX, "server_node>%s>bmc>ip" % machine_id)
            bmc_user = Utility.get_config_value(
                PRVSNR_CONFIG_INDEX, "server_node>%s>bmc>user" % machine_id)
            bmc_secret = Utility.get_config_value(
                PRVSNR_CONFIG_INDEX, "server_node>%s>bmc>secret" % machine_id)
            bmc_key = Cipher.generate_key(machine_id,
                                          ServiceTypes.SERVER_NODE.value)
            bmc_passwd = Cipher.decrypt(
                bmc_key, bmc_secret.encode("utf-8")).decode("utf-8")
            data_private_interfaces = Utility.get_config_value(
                PRVSNR_CONFIG_INDEX,
                "server_node>%s>network>data>private_interfaces" % machine_id)
            data_public_interfaces = Utility.get_config_value(
                PRVSNR_CONFIG_INDEX,
                "server_node>%s>network>data>public_interfaces" % machine_id)
        mgmt_public_fqdn = Utility.get_config_value(
            PRVSNR_CONFIG_INDEX,
            "server_node>%s>network>management>public_fqdn" % machine_id)
        mgmt_interfaces = Utility.get_config_value(
            PRVSNR_CONFIG_INDEX,
            "server_node>%s>network>management>interfaces" % machine_id)
        data_private_fqdn = Utility.get_config_value(
            PRVSNR_CONFIG_INDEX,
            "server_node>%s>network>data>private_fqdn" % machine_id)
        data_public_fqdn = Utility.get_config_value(
            PRVSNR_CONFIG_INDEX,
            "server_node>%s>network>data>public_fqdn" % machine_id)
        enclosure_id = Utility.get_config_value(
            PRVSNR_CONFIG_INDEX,
            "server_node>%s>storage>enclosure_id" % machine_id)
        primary_ip = Utility.get_config_value(
            PRVSNR_CONFIG_INDEX,
            "storage_enclosure>%s>controller>primary>ip" % enclosure_id)
        secondary_ip = Utility.get_config_value(
            PRVSNR_CONFIG_INDEX,
            "storage_enclosure>%s>controller>secondary>ip" % enclosure_id)
        cntrlr_user = Utility.get_config_value(
            PRVSNR_CONFIG_INDEX,
            "storage_enclosure>%s>controller>user" % enclosure_id)
        cntrlr_secret = Utility.get_config_value(
            PRVSNR_CONFIG_INDEX,
            "storage_enclosure>%s>controller>secret" % enclosure_id)
        cntrlr_key = Cipher.generate_key(enclosure_id,
                                         ServiceTypes.STORAGE_ENCLOSURE.value)
        cntrlr_passwd = Cipher.decrypt(
            cntrlr_key, cntrlr_secret.encode("utf-8")).decode("utf-8")

        # Validate BMC connectivity & storage controller accessibility
        if node_type.lower() not in ["virtual", "vm"]:
            NetworkV().validate("connectivity",
                                [bmc_ip, primary_ip, secondary_ip])
            BmcV().validate("accessible",
                            [socket.getfqdn(), bmc_ip, bmc_user, bmc_passwd])
            c_validator = ControllerV()
            c_validator.validate("accessible",
                                 [primary_ip, cntrlr_user, cntrlr_passwd])
            c_validator.validate("accessible",
                                 [secondary_ip, cntrlr_user, cntrlr_passwd])

        # Validate network fqdn reachability
        NetworkV().validate(
            "connectivity",
            [mgmt_public_fqdn, data_private_fqdn, data_public_fqdn])

        # Validate network interface availability
        for i_list in [
                mgmt_interfaces, data_private_interfaces,
                data_public_interfaces
        ]:
            self.validate_nw_cable_connection(i_list)
            self.validate_nw_interfaces(i_list)
    def configure_sspl_syslog(self):
        """Configure log file path in rsyslog and update logrotate config file."""
        system_files_root = "%s/low-level/files" % consts.SSPL_BASE_DIR
        sspl_log_file_path = Utility.get_config_value(consts.SSPL_CONFIG_INDEX,
            "SYSTEM_INFORMATION>sspl_log_file_path")
        sspl_sb_log_file_path = sspl_log_file_path.replace("/sspl.log","/sspl_support_bundle.log")
        iem_log_file_path = Utility.get_config_value(consts.SSPL_CONFIG_INDEX,
            "IEMSENSOR>log_file_path")
        manifest_log_file_path = sspl_log_file_path.replace("/sspl.log","/manifest.log")

        # IEM configuration
        os.makedirs("%s/iem/iec_mapping" % consts.PRODUCT_BASE_DIR, exist_ok=True)
        distutils.dir_util.copy_tree("%s/iec_mapping/" % system_files_root,
            "%s/iem/iec_mapping" % consts.PRODUCT_BASE_DIR)
        if not os.path.exists(consts.RSYSLOG_IEM_CONF):
            shutil.copyfile("%s/%s" % (system_files_root, consts.RSYSLOG_IEM_CONF),
                consts.RSYSLOG_IEM_CONF)
        # Update log location as per sspl.conf
        Utility.replace_expr(consts.RSYSLOG_IEM_CONF,
            'File.*[=,"]', 'File="%s"' % iem_log_file_path)

        # SSPL rsys log configuration
        if not os.path.exists(consts.RSYSLOG_SSPL_CONF):
            shutil.copyfile("%s/%s" % (system_files_root, consts.RSYSLOG_SSPL_CONF),
                consts.RSYSLOG_SSPL_CONF)
        # Update log location as per sspl.conf
        Utility.replace_expr(consts.RSYSLOG_SSPL_CONF, 'File.*[=,"]',
            'File="%s"' % sspl_log_file_path)

        # Manifest Bundle log configuration
        if not os.path.exists(consts.RSYSLOG_MSB_CONF):
            shutil.copyfile("%s/%s" % (system_files_root, consts.RSYSLOG_MSB_CONF),
                consts.RSYSLOG_MSB_CONF)
        # Update log location as per sspl.conf
        Utility.replace_expr(consts.RSYSLOG_MSB_CONF, 'File.*[=,"]',
            'File="%s"' % manifest_log_file_path)

        # Support Bundle log configuration
        if not os.path.exists(consts.RSYSLOG_SB_CONF):
            shutil.copyfile("%s/%s" % (system_files_root, consts.RSYSLOG_SB_CONF),
                consts.RSYSLOG_SB_CONF)
        # Update log location as per sspl.conf
        Utility.replace_expr(consts.RSYSLOG_SB_CONF, 'File.*[=,"]',
            'File="%s"' % sspl_sb_log_file_path)

        # Configure logrotate
        # Create logrotate dir in case it's not present
        os.makedirs(consts.LOGROTATE_DIR, exist_ok=True)
        Utility.replace_expr("%s/etc/logrotate.d/iem_messages" % system_files_root,
            0, iem_log_file_path)
        Utility.replace_expr("%s/etc/logrotate.d/sspl_logs" % system_files_root,
            0, sspl_log_file_path)
        Utility.replace_expr("%s/etc/logrotate.d/sspl_sb_logs" % system_files_root,
            0, sspl_sb_log_file_path)
        shutil.copy2("%s/etc/logrotate.d/iem_messages" % system_files_root,
            consts.IEM_LOGROTATE_CONF)
        shutil.copy2("%s/etc/logrotate.d/sspl_logs" % system_files_root,
            consts.SSPL_LOGROTATE_CONF)
        shutil.copy2("%s/etc/logrotate.d/manifest_logs" % system_files_root,
            consts.MSB_LOGROTATE_CONF)
        shutil.copy2("%s/etc/logrotate.d/sspl_sb_logs" % system_files_root,
            consts.SB_LOGROTATE_CONF)

        # This rsyslog restart will happen after successful updation of rsyslog
        # conf file and before sspl starts. If at all this will be removed from
        # here, there will be a chance that SSPL intial logs will not be present in
        # "/var/log/<product>/sspl/sspl.log" file. So, initial logs needs to be collected from
        # "/var/log/messages"
        service = DbusServiceHandler()
        service.restart('rsyslog.service')
Beispiel #11
0
NODE_TYPE = "node_type"
SMART_TEST_INTERVAL = "smart_test_interval"
SSPL_LOG_FILE_PATH = "sspl_log_file_path"
STORAGE = "storage"
STORAGE_SET_ID = "storage_set_id"
STORE_TYPE = "store_type"
THREADED = "threaded"
TIMESTAMP_FILE_PATH = "timestamp_file_path"
TRANSMIT_INTERVAL = "transmit_interval"
TYPE = "type"
UNITS = "units"
USER = "******"
USERNAME = "******"
VIRTUAL_HOST = "virtual_host"
TARGET_BUILD = "target_build"

# Get SRVNODE and ENCLOSURE so it can be used in other files to get
# server_node and enclosure specific config
utility = Utility()
MACHINE_ID = utility.get_machine_id()
OPERATING_SYSTEM = utility.get_os()

Conf.load(SSPL_CONF, "yaml:///etc/sspl.conf")
global_config = Conf.get(SSPL_CONF,
                         "SYSTEM_INFORMATION>global_config_copy_url")
Conf.load(GLOBAL_CONF, global_config)

SRVNODE = Conf.get(GLOBAL_CONF, f'{CLUSTER}>{SERVER_NODES}')[MACHINE_ID]
ENCLOSURE = Conf.get(GLOBAL_CONF,
                     f"{CLUSTER}>{SRVNODE}>{STORAGE}>{ENCLOSURE_ID}")
    def validate(self):
        """Check for below requirement.

        1. Validate if sspl-ll user exists
        2. Validate input keys
        """
        # Check sspl-ll user exists
        sspl_uid = Utility.get_uid(self.user)
        if sspl_uid == -1:
            msg = "User %s doesn't exist. Please add user." % self.user
            logger.error(msg)
            raise SetupError(errno.EINVAL, msg)
        # Check input/provisioner configs
        machine_id = Utility.get_machine_id()
        cluster_id = Utility.get_config_value(
            consts.PRVSNR_CONFIG_INDEX,
            "server_node>%s>cluster_id" % machine_id)
        Utility.get_config_value(consts.PRVSNR_CONFIG_INDEX,
                                 "cluster>%s>name" % cluster_id)
        Utility.get_config_value(consts.PRVSNR_CONFIG_INDEX,
                                 "cluster>%s>site_count" % cluster_id)
        storage_set_count = int(
            Utility.get_config_value(
                consts.PRVSNR_CONFIG_INDEX,
                "cluster>%s>site>storage_set_count" % cluster_id))
        for i in range(storage_set_count):
            Utility.get_config_value(
                consts.PRVSNR_CONFIG_INDEX,
                "cluster>%s>storage_set[%s]>name" % (cluster_id, i))
            Utility.get_config_value(
                consts.PRVSNR_CONFIG_INDEX,
                "cluster>%s>storage_set[%s]>server_nodes" % (cluster_id, i))
            Utility.get_config_value(
                consts.PRVSNR_CONFIG_INDEX,
                "cluster>%s>storage_set[%s]>storage_enclosures" %
                (cluster_id, i))
Beispiel #13
0
    def validate(self):
        """Check for required packages are installed."""
        # RPM dependency
        rpm_deps = {"cortx-sspl-test": None}
        # python 3rd party package dependency
        pip3_packages_dep = {"Flask": "1.1.1", "coverage": "5.5"}
        if not self.coverage_enabled:
            pip3_packages_dep.pop("coverage")

        # Validate pip3 python pkg with required version.
        for pkg, version in pip3_packages_dep.items():
            installed_pkg = None
            uninstalled_pkg = False
            try:
                pkg_req = Requirement.parse(f"{pkg}=={version}")
                installed_pkg = working_set.find(pkg_req)
            except VersionConflict:
                cmd = f'pip3 uninstall -y {pkg}'
                _, err, ret = SimpleProcess(cmd).run()
                if ret:
                    raise TestException(
                        "Failed to uninstall the pip3 pkg: %s(v%s), "
                        "due to an Error: %s" % (pkg, version, err))
                uninstalled_pkg = True
            except Exception as err:
                raise TestException("Failed at verification of pip3 pkg: %s, "
                                    "due to an Error: %s" % (pkg, err))

            if not installed_pkg or uninstalled_pkg:
                cmd = f'pip3 install {pkg}=={version}'
                _, err, ret = SimpleProcess(cmd).run()
                if ret:
                    raise TestException(
                        "Failed to install the pip3 pkg: %s(v%s), "
                        "due to an Error: %s" % (pkg, version, err))
            logger.info(f"Ensured Package Dependency: {pkg}(v{version}).")

        # Validate rpm dependencies
        pkg_validator = PkgV()
        pkg_validator.validate_rpm_pkgs(host=socket.getfqdn(),
                                        pkgs=rpm_deps,
                                        skip_version_check=True)
        # Load global, sspl and test configs
        Conf.load(SSPL_CONFIG_INDEX, sspl_config_path)
        Conf.load(SSPL_TEST_CONFIG_INDEX, sspl_test_config_path)
        # Take copy of supplied config passed to sspl_test and load it
        with open(self.sspl_test_gc_copy_file, "w") as f:
            f.write("")
        self.sspl_test_gc_copy_url = "yaml://%s" % self.sspl_test_gc_copy_file
        Conf.load(SSPL_TEST_GLOBAL_CONFIG, self.sspl_test_gc_copy_url)
        Conf.load("global_config", self.sspl_test_gc_url)
        Conf.copy("global_config", SSPL_TEST_GLOBAL_CONFIG)
        # Validate input configs
        machine_id = Utility.get_machine_id()
        self.node_type = Conf.get(SSPL_TEST_GLOBAL_CONFIG,
                                  "server_node>%s>type" % machine_id)
        enclosure_id = Conf.get(
            SSPL_TEST_GLOBAL_CONFIG,
            "server_node>%s>storage>enclosure_id" % machine_id)
        self.enclosure_type = Conf.get(
            SSPL_TEST_GLOBAL_CONFIG,
            "storage_enclosure>%s>type" % enclosure_id)
    def cleanup_log_and_config():
        """--pre-factory cleanup : Cleanup logs, config files and
        undo everything whatever was done in post-install Mini-Provisioner
        Interface."""
        Conf.load(SSPL_CONFIG_INDEX, sspl_config_path)
        sspl_log_file_path = Utility.get_config_value(
            SSPL_CONFIG_INDEX, "SYSTEM_INFORMATION>sspl_log_file_path")
        iem_log_file_path = Utility.get_config_value(
            SSPL_CONFIG_INDEX, "IEMSENSOR>log_file_path")
        message_types = [
            Utility.get_config_value(SSPL_CONFIG_INDEX, "INGRESSPROCESSOR>message_type"),
            Utility.get_config_value(SSPL_CONFIG_INDEX, "EGRESSPROCESSOR>message_type")]

        # Directories and file which needs to deleted.
        directories = [
            f'/var/{PRODUCT_FAMILY}/sspl', f'/var/{PRODUCT_FAMILY}/iem/',
            f'/var/log/{PRODUCT_FAMILY}/sspl/', f'/var/log/{PRODUCT_FAMILY}/iem/',
            '/etc/sspl-ll/', f'{PRODUCT_BASE_DIR}/iem/iec_mapping']

        sspl_sudoers_file = '/etc/sudoers.d/sspl'
        sspl_dbus_policy_rules = '/etc/polkit-1/rules.d/sspl-ll_dbus_policy.rules'
        sspl_dbus_policy_conf = '/etc/dbus-1/system.d/sspl-ll_dbus_policy.conf'
        sspl_service_file = '/etc/systemd/system/sspl-ll.service'
        sspl_test_backup = '/etc/sspl_tests.conf.back'
        sspl_test_file_path = '/etc/sspl_test_gc_url.yaml'
        sspl_sb_log_file_path = sspl_log_file_path.replace(
            "/sspl.log", "/sspl_support_bundle.log")
        manifest_log_file_path = sspl_log_file_path.replace(
            "/sspl.log", "/manifest.log")

        # symlinks created during post_install
        sspl_ll_cli = "/usr/bin/sspl_ll_cli"

        # Remove SSPL config other config/log files which were
        # created during post_install.
        for filepath in [
                sspl_ll_cli, sspl_test_backup, sspl_test_file_path,
                file_store_config_path, global_config_file_path,
                sspl_log_file_path, iem_log_file_path, sspl_sb_log_file_path,
                manifest_log_file_path, RSYSLOG_IEM_CONF, IEM_LOGROTATE_CONF,
                sspl_dbus_policy_conf, sspl_dbus_policy_rules,
                sspl_sudoers_file, sspl_service_file]:
            FileStore().delete(filepath)

        # Delete directories which were created during post_install.
        for directory in directories:
            FileStore().delete(directory)
        logger.info("Deleted config/log files and directories.")

        # Delete sspl-ll user
        usernames = [x[0] for x in pwd.getpwall()]
        if USER in usernames:
            _, err, rc = SimpleProcess("/usr/sbin/userdel -f %s" % USER).run()
            if rc != 0:
                logger.info("Error occurref while deleteing %s user. ERROR: %s"
                    %(USER, err))
            else:
                logger.info("Deleted %s user." % USER)

        # Delete topic
        mbadmin = MessageBusAdmin(admin_id="admin")
        try:
            mbadmin.deregister_message_type(message_types)
            logger.info("Delete kafka %s topics." % message_types)
        except MessageBusError as e:
            logger.error(f"MessageBusError occurred while deleting topic:{e}")
Beispiel #15
0
    def _generate_cpu_data(self):
        """Create & transmit a cpu_data message as defined
            by the sensor response json schema"""

        current_time = Utility.get_current_time()

        # Notify the node sensor to update its data required for the cpu_data message
        successful = self._node_sensor.read_data("cpu_data", self._get_debug())
        if not successful:
            logger.error(
                "NodeDataMsgHandler, _generate_cpu_data was NOT successful.")

        self._cpu_usage_threshold = str(self._cpu_usage_threshold)
        try:
            if self._cpu_usage_threshold.isdigit():
                self._cpu_usage_threshold = int(self._cpu_usage_threshold)
            else:
                self._cpu_usage_threshold = float(self._cpu_usage_threshold)
        except ValueError:
            logger.warn(
                "CPU Usage Alert, Invalid host_memory_usage_threshold value are entered in config."
            )
            # Assigning default value to _cpu_usage_threshold
            self._cpu_usage_threshold = self.DEFAULT_CPU_USAGE_THRESHOLD

        cpu_persistent_data = self.read_persistent_data('CPU_USAGE_DATA')
        if cpu_persistent_data.get('cpu_usage_time_map') is not None:
            previous_check_time = int(
                cpu_persistent_data['cpu_usage_time_map'])
        else:
            previous_check_time = int(-1)
        if cpu_persistent_data.get(
                'cpu_fault_resolved_iterations') is not None:
            fault_resolved_iters = int(
                cpu_persistent_data['cpu_fault_resolved_iterations'])
        else:
            fault_resolved_iters = 0
        try:
            iteration_limit = int(self._high_cpu_usage_wait_threshold /
                                  self._transmit_interval)
        except ZeroDivisionError:
            iteration_limit = 0
        self.usage_time_map['cpu'] = current_time

        if self._node_sensor.cpu_usage >= self._cpu_usage_threshold \
           and not self.high_usage['cpu']:
            if previous_check_time == -1:
                previous_check_time = current_time
                self.persist_state_data('cpu', 'CPU_USAGE_DATA')

            if self.usage_time_map[
                    'cpu'] - previous_check_time >= self._high_cpu_usage_wait_threshold:

                self.high_usage['cpu'] = True
                self.fault_resolved_iterations['cpu'] = 0
                # Create the cpu usage data message and hand it over
                # to the egress processor to transmit
                fault_event = "CPU usage has increased to {}%, "\
                    "beyond the configured threshold of {}% "\
                    "for more than {} seconds.".format(
                        self._node_sensor.cpu_usage,
                        self._cpu_usage_threshold,
                        self._high_cpu_usage_wait_threshold
                    )
                logger.warn(fault_event)

                # Create the cpu usage update message and hand it over to the egress processor to transmit
                cpuDataMsg = CPUdataMsg(
                    self._node_sensor.host_id, self._epoch_time,
                    self._node_sensor.csps, self._node_sensor.idle_time,
                    self._node_sensor.interrupt_time,
                    self._node_sensor.iowait_time, self._node_sensor.nice_time,
                    self._node_sensor.softirq_time,
                    self._node_sensor.steal_time,
                    self._node_sensor.system_time, self._node_sensor.user_time,
                    self._node_sensor.cpu_core_data,
                    self._node_sensor.cpu_usage, self.FAULT, fault_event)

                # Add in uuid if it was present in the json request
                if self._uuid is not None:
                    cpuDataMsg.set_uuid(self._uuid)
                jsonMsg = cpuDataMsg.getJson()
                self.cpu_sensor_data = jsonMsg
                self.os_sensor_type["cpu_usage"] = self.cpu_sensor_data

                # Transmit it to message processor
                self._write_internal_msgQ(EgressProcessor.name(), jsonMsg)
                # Store the state to Persistent Cache.
                self.persist_state_data('cpu', 'CPU_USAGE_DATA')

        if self._node_sensor.cpu_usage < self._cpu_usage_threshold:
            if not self.high_usage['cpu']:
                self.persist_state_data('cpu', 'CPU_USAGE_DATA')
            else:
                if fault_resolved_iters < iteration_limit:
                    fault_resolved_iters += 1
                    self.fault_resolved_iterations[
                        'cpu'] = fault_resolved_iters
                    self.persist_state_data('cpu', 'CPU_USAGE_DATA')
                elif fault_resolved_iters >= iteration_limit:

                    # Create the cpu usage data message and hand it over
                    # to the egress processor to transmit
                    fault_resolved_event = "CPU usage has decreased to {}%, "\
                        "lower than the configured threshold of {}%.".format(
                            self._node_sensor.cpu_usage,
                            self._cpu_usage_threshold
                        )
                    logger.info(fault_resolved_event)

                    # Create the cpu usage update message and hand it over to the egress processor to transmit
                    cpuDataMsg = CPUdataMsg(
                        self._node_sensor.host_id, self._epoch_time,
                        self._node_sensor.csps, self._node_sensor.idle_time,
                        self._node_sensor.interrupt_time,
                        self._node_sensor.iowait_time,
                        self._node_sensor.nice_time,
                        self._node_sensor.softirq_time,
                        self._node_sensor.steal_time,
                        self._node_sensor.system_time,
                        self._node_sensor.user_time,
                        self._node_sensor.cpu_core_data,
                        self._node_sensor.cpu_usage, self.FAULT_RESOLVED,
                        fault_resolved_event)

                    # Add in uuid if it was present in the json request
                    if self._uuid is not None:
                        cpuDataMsg.set_uuid(self._uuid)
                    jsonMsg = cpuDataMsg.getJson()
                    self.cpu_sensor_data = jsonMsg
                    self.os_sensor_type["cpu_usage"] = self.cpu_sensor_data

                    # Transmit it to message processor
                    self._write_internal_msgQ(EgressProcessor.name(), jsonMsg)
                    self.high_usage['cpu'] = False
                    self.usage_time_map['cpu'] = int(-1)
                    self.fault_resolved_iterations['cpu'] = 0
                    # Store the state to Persistent Cache.
                    self.persist_state_data('cpu', 'CPU_USAGE_DATA')
Beispiel #16
0
    def _generate_host_update(self):
        """Create & transmit a host update message as defined
            by the sensor response json schema"""

        current_time = Utility.get_current_time()

        # Notify the node sensor to update its data required for the host_update message
        successful = self._node_sensor.read_data("host_update",
                                                 self._get_debug(),
                                                 self._units)
        if not successful:
            logger.error(
                "NodeDataMsgHandler, _generate_host_update was NOT successful."
            )

        self._host_memory_usage_threshold = str(
            self._host_memory_usage_threshold)
        try:
            if self._host_memory_usage_threshold.isdigit():
                self._host_memory_usage_threshold = int(
                    self._host_memory_usage_threshold)
            else:
                self._host_memory_usage_threshold = float(
                    self._host_memory_usage_threshold)
        except ValueError:
            logger.warn(
                "Host Memory Alert, Invalid host_memory_usage_threshold value are entered in config."
            )
            # Assigning default value to _memory_usage_threshold
            self._host_memory_usage_threshold = self.DEFAULT_HOST_MEMORY_USAGE_THRESHOLD

        memory_persistent_data = self.read_persistent_data('MEMORY_USAGE_DATA')
        if memory_persistent_data.get('memory_usage_time_map') is not None:
            previous_check_time = int(
                memory_persistent_data['memory_usage_time_map'])
        else:
            previous_check_time = int(-1)
        if memory_persistent_data\
                .get('memory_fault_resolved_iterations') is not None:
            fault_resolved_iters = int(
                memory_persistent_data['memory_fault_resolved_iterations'])
        else:
            fault_resolved_iters = 0
        try:
            iteration_limit = int(self._high_memory_usage_wait_threshold /
                                  self._transmit_interval)
        except ZeroDivisionError:
            iteration_limit = 0
        self.usage_time_map['memory'] = current_time

        if self._node_sensor.total_memory["percent"] >= self._host_memory_usage_threshold \
           and not self.high_usage['memory']:
            if previous_check_time == -1:
                previous_check_time = current_time
                self.persist_state_data('memory', 'MEMORY_USAGE_DATA')

            if self.usage_time_map[
                    'memory'] - previous_check_time >= self._high_memory_usage_wait_threshold:
                self.high_usage['memory'] = True
                self.fault_resolved_iterations['memory'] = 0
                # Create the memory data message and hand it over
                # to the egress processor to transmit
                fault_event = "Host memory usage has increased to {}%,"\
                    "beyond the configured threshold of {}% "\
                    "for more than {} seconds.".format(
                        self._node_sensor.total_memory["percent"],
                        self._host_memory_usage_threshold,
                        self._high_memory_usage_wait_threshold
                    )

                logger.warn(fault_event)

                logged_in_users = []
                # Create the host update message and hand it over to the egress processor to transmit
                hostUpdateMsg = HostUpdateMsg(
                    self._node_sensor.host_id, self._epoch_time,
                    self._node_sensor.boot_time, self._node_sensor.up_time,
                    self._node_sensor.uname, self._units,
                    self._node_sensor.total_memory,
                    self._node_sensor.logged_in_users,
                    self._node_sensor.process_count,
                    self._node_sensor.running_process_count, self.FAULT,
                    fault_event)
                # Add in uuid if it was present in the json request
                if self._uuid is not None:
                    hostUpdateMsg.set_uuid(self._uuid)
                jsonMsg = hostUpdateMsg.getJson()
                # Transmit it to message processor
                self.host_sensor_data = jsonMsg
                self.os_sensor_type["memory_usage"] = self.host_sensor_data
                self._write_internal_msgQ(EgressProcessor.name(), jsonMsg)
                self.persist_state_data('memory', 'MEMORY_USAGE_DATA')

        if self._node_sensor.total_memory[
                "percent"] < self._host_memory_usage_threshold:
            if not self.high_usage['memory']:
                self.persist_state_data('memory', 'MEMORY_USAGE_DATA')
            else:
                if fault_resolved_iters < iteration_limit:
                    fault_resolved_iters += 1
                    self.fault_resolved_iterations[
                        'memory'] = fault_resolved_iters
                    self.persist_state_data('memory', 'MEMORY_USAGE_DATA')
                elif fault_resolved_iters >= iteration_limit:
                    # Create the memory data message and hand it over
                    # to the egress processor to transmit
                    fault_resolved_event = "Host memory usage has decreased to {}%, "\
                        "lower than the configured threshold of {}%.".format(
                            self._node_sensor.total_memory["percent"],
                            self._host_memory_usage_threshold
                        )
                    logger.info(fault_resolved_event)
                    logged_in_users = []

                    # Create the host update message and hand it over to the egress processor to transmit
                    hostUpdateMsg = HostUpdateMsg(
                        self._node_sensor.host_id, self._epoch_time,
                        self._node_sensor.boot_time, self._node_sensor.up_time,
                        self._node_sensor.uname, self._units,
                        self._node_sensor.total_memory,
                        self._node_sensor.logged_in_users,
                        self._node_sensor.process_count,
                        self._node_sensor.running_process_count,
                        self.FAULT_RESOLVED, fault_resolved_event)

                    # Add in uuid if it was present in the json request
                    if self._uuid is not None:
                        hostUpdateMsg.set_uuid(self._uuid)
                    jsonMsg = hostUpdateMsg.getJson()
                    # Transmit it to message processor
                    self.host_sensor_data = jsonMsg
                    self.os_sensor_type["memory_usage"] = self.host_sensor_data
                    self._write_internal_msgQ(EgressProcessor.name(), jsonMsg)
                    self.high_usage['memory'] = False
                    self.usage_time_map['memory'] = int(-1)
                    self.fault_resolved_iterations['memory'] = 0
                    self.persist_state_data('memory', 'MEMORY_USAGE_DATA')