class TestDecisionMaker(unittest.TestCase): """Module to test DecisionMaker class""" res_to_entity_mapping = { "enclosure": ("enclosure", "connectivity"), "enclosure:fru:controller": ("enclosure", "controller") } mock_decisiondb = DecisionDB() _dec_maker = DecisionMaker(decisiondb=mock_decisiondb) json_alert_data = Json(file_path).load() rules_data = Json(rules_schema_path).load() _loop = asyncio.get_event_loop() def test_handle_alert(self): """tests handle_alert functio of DecisionMaker class""" assert self.json_alert_data is not None self.assertTrue(isinstance(self.json_alert_data, dict)) self._loop.run_until_complete( self._dec_maker.handle_alert(self.json_alert_data)) res_type = self.json_alert_data["message"]["sensor_response_type"][ "info"]["resource_type"] res_id = self.json_alert_data["message"]["sensor_response_type"][ "info"]["resource_id"] host_id = self.json_alert_data["message"]["sensor_response_type"][ "host_id"] event_time = self.json_alert_data["message"]["sensor_response_type"][ "info"]["event_time"] alert_type = self.json_alert_data["message"]["sensor_response_type"][ "alert_type"] severity = self.json_alert_data["message"]["sensor_response_type"][ "severity"] tuple_val = self.res_to_entity_mapping[res_type] entity, component = tuple_val[0], tuple_val[1] if entity == "enclosure": entity_id = '0' else: entity_id = host_id if res_type == "enclosure": component_id = host_id else: component_id = res_id action = '' res_type_data = self.rules_data[res_type] if res_type_data is not None: for item in res_type_data: if alert_type == item["alert_type"] and \ severity == item["severity"]: action = item["action"] self.mock_decisiondb.store_event.assert_called_with(entity, entity_id, \ component, component_id, event_time, action)
def get_command(argv, permissions=None, component_cmd_dir="", excluded_cmds=None, hidden_cmds=None): """ Parse the command line as per the syntax and retuns. returns command representing the command line. """ if permissions is None: permissions = {} if excluded_cmds is None: excluded_cmds = [] if hidden_cmds is None: hidden_cmds = [] if len(argv) <= 1: argv.append("-h") commands_files = os.listdir(component_cmd_dir) excluded_cmds.extend(const.EXCLUDED_COMMANDS) commands = [ command.split(".json")[0] for command in commands_files if command.split(".json")[0] not in excluded_cmds ] if permissions: # common commands both in commands and permissions key list commands = [ command for command in commands if command in permissions.keys() ] parser = ArgumentParser(description="Cortx cli commands") hidden_cmds.extend(const.HIDDEN_COMMANDS) metavar = set(commands).difference(set(hidden_cmds)) subparsers = parser.add_subparsers(metavar=metavar) if argv[0] in commands: # get command json file and filter only allowed first level sub_command # create filter_permission_json cmd_from_file = Json( os.path.join(component_cmd_dir, f"{argv[0]}.json")).load() cmd_obj = CommandParser(cmd_from_file, permissions.get(argv[0], {})) cmd_obj._handle_main_parse(subparsers) namespace = parser.parse_args(argv) sys_module = sys.modules[__name__] for attr in ["command", "action", "args"]: setattr(sys_module, attr, getattr(namespace, attr)) delattr(namespace, attr) return command(action, vars(namespace), args)
import os import sys import unittest import configparser import json import toml import yaml from cortx.utils.kv_store import KvStoreFactory from cortx.utils.schema.payload import Json sys.path.append(os.path.join(os.path.dirname(__file__), "..", "..")) dir_path = os.path.dirname(os.path.realpath(__file__)) file_path = os.path.join(dir_path, 'conf_sample_json.json') properties_file = os.path.join(dir_path, 'properties.txt') sample_config = Json(file_path).load() def setup_and_generate_sample_files(): """ This function will generate all required types of file """ with open(r'/tmp/file.json', 'w+') as file: json.dump(sample_config, file, indent=2) with open(r'/tmp/sample.yaml', 'w+') as file: yaml.dump(sample_config, file) with open(r'/tmp/document.toml', 'w+') as file: toml.dump(sample_config, file) p_config = configparser.ConfigParser()
def __init__(self): self._resource_file = Json( os.path.join(const.CONF_PATH, const.DECISION_MAPPING_FILE)).load() self._loop = asyncio.get_event_loop() self._consul_call = self.ConsulCallHandler(self._resource_file)
class DecisionMonitor: """ Fetch Resource Decisions from Decision DB. """ def __init__(self): self._resource_file = Json( os.path.join(const.CONF_PATH, const.DECISION_MAPPING_FILE)).load() self._loop = asyncio.get_event_loop() self._consul_call = self.ConsulCallHandler(self._resource_file) class ConsulCallHandler: """ Handle async call to consul """ def __init__(self, resource_file): """ Initialize consul call handler """ self._decisiondb = DecisionDB() self._consul_timeout = resource_file.get("request_timeout", 3.0) async def get(self, **resource_key): """ Get consul data else raise error """ return await asyncio.wait_for(self._decisiondb.get_event_time(**resource_key, sort_by=SortBy(DecisionModel.alert_time, SortOrder.DESC)), timeout=self._consul_timeout) async def delete(self, **resource_key): """ Delete consul data else raise error """ await asyncio.wait_for(self._decisiondb.delete_event(**resource_key), timeout=self._consul_timeout) def get_resource_status(self, resource: AnyStr): """ Get the Status for Resource :param resource: Name of Resource :type: str :return: """ Log.debug(f"Received Status Request for resource {resource}") resource_key = self._resource_file.get("resources", {}).get(resource, {}) try: resource_data = self._loop.run_until_complete( self._consul_call.get(**resource_key)) except Exception as e: # Return OK if Failed to Fetch Resource Status. Log.error(f"{traceback.format_exc()} {e}") return Action.OK if resource_data: return resource_data[0].action return Action.OK def get_resource_group_status(self, resource_group): """ Fetch Resource Group Status. :param resource_group: Name of Resource Group. :return: """ group_status = [] Log.debug(f"Received Status Request for resource group {resource_group}") # Fetch List of Resources in group resources = self._resource_file.get("resource_groups", {}).get( resource_group, []) for resource in resources: # Check's the status for each resource. status = self.get_resource_status(resource) if status in [Action.FAILED]: # Return Failed if any one is Failed Status in RG. return status group_status.append(status) if Action.RESOLVED in group_status: # Return Resolved if none is Failed and any one is resolved Status in RG. return Action.RESOLVED return Action.OK def acknowledge_resource(self, resource, force=False): """ Acknowledge a Single Resource Group. :param resource: :return: """ Log.debug(f"Received Acknowledge Request for resource {resource}") resource_key = self._resource_file.get("resources", {}).get(resource, {}) try: if force or not self.get_resource_status(resource) == Action.FAILED: self._loop.run_until_complete( self._consul_call.delete(**resource_key)) except Exception as e: Log.error(f"{e}") def acknowledge_resource_group(self, resource_group): """ Acknowledge a Single Resource Group. :param resource_group: :return: """ Log.debug(f"Received Acknowledge Request for resource group {resource_group}") resources = self._resource_file.get("resource_groups", {}).get( resource_group, []) for resource in resources: self.acknowledge_resource(resource)
class DecisionMaker(object): """ This class is responsible for taking the HA decisions such as failover/failback with the help of RuleEngine """ def __init__(self, decisiondb=DecisionDB()): self._rule_engine = RuleEngine(os.path.join(\ const.CORTX_HA_INSTALL_PATH, const.RULES_FILE_PATH)) self._decision_db = decisiondb self._conf = Json(os.path.join(\ const.CORTX_HA_INSTALL_PATH, const.CONF_FILE_PATH)).load() async def _get_data_nw_interface(self, host_id): interface = [] if self._conf: interface = self._conf.get(const.NETWORK).get(host_id).get\ (const.DATA_IFACE) return interface async def _get_mgmt_nw_interface(self, host_id): interface = [] if self._conf: interface = self._conf.get(const.NETWORK).get(host_id).get\ (const.MGMT_IFACE) return interface async def _get_host_id(self, node_id): host_id = "" if self._conf: host_id = self._conf.get(const.NODES).get(node_id) return host_id async def handle_alert(self, alert): """ Accepts alert in the dict format and validates the same alert against set of rules with the help of RuleEngine. """ try: if alert is not None: action = self._rule_engine.evaluate_alert(alert) if action is not None: await self._store_action(alert, action) except Exception as e: Log.error(f"Error occured during alert handling. {e}") async def _store_action(self, alert, action): """ Further parses the alert to store information such as: component: Actual Hw component which has been affected component_id: FRU_ID entity: enclosure/node entity_id: resource id """ try: sensor_response = alert.get(const.MESSAGE).get(const.SENSOR_RES_TYPE) info_dict = await self._set_db_key_info(sensor_response) if info_dict: await self._decision_db.store_event(info_dict[const.ENTITY], \ info_dict[const.ENTITY_ID], info_dict[const.COMPONENT], \ info_dict[const.COMPONENT_ID], info_dict[const.EVENT_TIME], action) except Exception as e: Log.error(f"Error occured during storing action. {e}") async def _set_db_key_info(self, sensor_response): """ This function derives entity, entity_id, component, component_id, event_time from the incoming alert. These fields are required to create key for storing the decision in db. Key format - HA/entity/entity_id/component/component_Id/timestamp Examples - 1. HA/Enclosure/0/controller/1/timestamp 2. HA/Enclosure/0/controller/2/timestamp 3. HA/Enclosure/0/fan/0/timestamp 4. HA/Node/1/raid/0/timestamp 5. HA/Node/0/IEM/motr/timestamp 6. HA/Node/1/IEM/s3/timestamp """ info_dict = dict() info = sensor_response.get(const.INFO) resource_type = info.get(const.RESOURCE_TYPE) resource_id = info.get(const.RESOURCE_ID) node_id = info.get(const.NODE_ID) host_id = await self._get_host_id(node_id) """ 1. Setting event time. """ info_dict[const.EVENT_TIME] = info.get(const.EVENT_TIME) """ Here resource type can be in 2 forms - 1. enclosure:fru:disk, node:os:disk_space, node:interface:nw:cable etc 2. enclosure, iem Spliting the resource type will give us the entity and component fields. """ res_list = resource_type.split(':') """ 2. Setting entity. For IEM alerts we do not get Node/Enclosure in resource type, so we have to hardcode it to node. """ if resource_type == const.IEM: component_var = sensor_response.get(const.SPECIFIC_INFO).get\ (const.SPECIFIC_INFO_COMPONENT) info_dict[const.ENTITY] = const.NODE info_dict[const.COMPONENT] = resource_type info_dict[const.COMPONENT_ID] = component_var else: info_dict[const.ENTITY] = res_list[0] """ 3. Setting entity_id """ if info_dict[const.ENTITY] == const.NODE: info_dict[const.ENTITY_ID] = host_id else: info_dict[const.ENTITY_ID] = "0" """ 4. Setting Component. We will check if we have got the component value in resource type. """ if len(res_list) > 1: info_dict[const.COMPONENT] = res_list[len(res_list) - 1] else: """ We have to perform some checks if component is not present in reource_type field. 1. For storage connectivity we have component = connectivity 2. For storage connectivity we have component_id = node/host id """ if info_dict[const.ENTITY] == const.ENCLOSURE: info_dict[const.COMPONENT] = const.CONNECTIVITY info_dict[const.COMPONENT_ID] = host_id """ 5. Setting component id """ if info_dict[const.COMPONENT] == const.CONTROLLER: info_dict[const.COMPONENT_ID] = host_id elif resource_type in (const.NIC, const.NIC_CABLE): """ If resource_type is node:interface:nw, node:interface:nw:cable then we will read the values from config to know whether it is data or management interface. Since BMC interface is also included in NIC alert we do not have to take any against against it. In case we found the interface related to BMC so we will ignore it. """ comp_id = await self._get_component_id_for_nic(host_id, resource_id) if comp_id: info_dict[const.COMPONENT_ID] = comp_id else: info_dict = {} elif resource_type not in (const.IEM, const.ENCLOSURE): """ For IEM the component id is fetched from specific info's component id field """ info_dict[const.COMPONENT_ID] = resource_id return info_dict async def _get_component_id_for_nic(self, host_id, resource_id): component_id = "" """ First checking if resource is found in data_nw. """ nw_interface = await self._get_data_nw_interface(host_id) if resource_id in nw_interface: component_id = const.DATA else: """ Since resource not found in data_nw lets serach is mgmt_nw. """ nw_interface = await self._get_mgmt_nw_interface(host_id) if resource_id in nw_interface: component_id = const.MGMT return component_id
def __init__(self, decisiondb=DecisionDB()): self._rule_engine = RuleEngine(os.path.join(\ const.CORTX_HA_INSTALL_PATH, const.RULES_FILE_PATH)) self._decision_db = decisiondb self._conf = Json(os.path.join(\ const.CORTX_HA_INSTALL_PATH, const.CONF_FILE_PATH)).load()
# For any questions about this software or licensing, # please email [email protected] or [email protected]. import asyncio import datetime import os import unittest from cortx.utils.ha.dm.decision_monitor import DecisionMonitor from cortx.utils.ha.dm.repository.decisiondb import DecisionDB from cortx.utils.schema.payload import Json dir_path = os.path.dirname(os.path.realpath(__file__)) file_path = os.path.join(dir_path, 'test_schema', 'test_decision_monitor_data.json') TEST_DATA = Json(file_path).load() def _generate_data(): d = DecisionDB() for index, each_input in enumerate(TEST_DATA.get("input", [])): each_input["alert_time"] = str(datetime.datetime.now() + datetime.timedelta(hours=index)) d.store_event(**each_input) class TestDecisionMonitor(unittest.TestCase): _dm = DecisionMonitor() _dm._resource_file = TEST_DATA.get("test_file") _loop = asyncio.get_event_loop() _generate_data()
def __init__(self): if ConfInit.__instance == None: ConfInit.__instance = self Conf.init() Conf.load(const.CONFIG_INDEX, Json(const.MESSAGE_BUS_CONF))