def setUp(self): self.event_manager = EventManager.get_instance() self.component = SUBSCRIPTION_LIST.TEST self.event = SubscribeEvent(RESOURCE_TYPES.NODE, [ RESOURCE_STATUS.FAILED, RESOURCE_STATUS.ONLINE, RESOURCE_STATUS.DEGRADED, RESOURCE_STATUS.OFFLINE ])
def _create_listener(self) -> EventListener: host = self.cns.get_local_nodename() group = f'hare_{host}' # group_id stands to Kafka group of consumers # # Here we make sure that different hax instances use different groups. # That means that ack's executed from one hax instance will not affect # the messages that another hax instance receives (so every hax reads # the whole history of messages even if they process the messages with # different speed). return EventListener( [SubscribeEvent('node', ['offline', 'online', 'failed'])], group_id=group)
def main(): component = "hare" resource_type = "node" state = "offline" # import pudb.remote # pudb.remote.set_trace(term_size=(130, 40), port=9998) # Before submitting a fake event, we need to register the component # (just to make sure that the message will be sent) EventManager.get_instance().subscribe( component, [SubscribeEvent(resource_type, [state])]) handler = NodeActionHandler() event = HealthEvent("event_id", HEALTH_STATUSES.OFFLINE.value, "severity", "1", "1", "e766bd52-c19c-45b6-9c91-663fd8203c2e", "storage-set-1", "localhost", "srvnode-1.mgmt.public", "node", "16215009572", "iem", "Description") handler.publish_event(event)
def process(self): """ Process config command. """ try: # Get log path from cluster.conf. log_path = Conf.get( self._index, f'cortx{_DELIM}common{_DELIM}storage{_DELIM}log') machine_id = Conf.machine_id ha_log_path = os.path.join(log_path, f'ha/{machine_id}') consul_endpoints = Conf.get( self._index, f'cortx{_DELIM}external{_DELIM}consul{_DELIM}endpoints') #========================================================# # consul Service endpoints from cluster.conf # #____________________ cluster.conf ______________________# # endpoints: # # - tcp://consul-server.default.svc.cluster.local:8301 # # - http://consul-server.default.svc.cluster.local:8500 # #========================================================# # search for supported consul endpoint url from list of configured consul endpoints filtered_consul_endpoints = list( filter( lambda x: isinstance(x, str) and urlparse(x).scheme == const.consul_scheme, consul_endpoints)) if not filtered_consul_endpoints: sys.stderr.write( f'Failed to get consul config. consul_config: {filtered_consul_endpoints}. \n' ) sys.exit(1) # discussed and confirmed to select the first hhtp endpoint consul_endpoint = filtered_consul_endpoints[0] kafka_endpoint = Conf.get( self._index, f'cortx{_DELIM}external{_DELIM}kafka{_DELIM}endpoints') if not kafka_endpoint: sys.stderr.write( f'Failed to get kafka config. kafka_config: {kafka_endpoint}. \n' ) sys.exit(1) health_comm_msg_type = FAULT_TOLERANCE_KEYS.MONITOR_HA_MESSAGE_TYPE.value conf_file_dict = { 'LOG': { 'path': ha_log_path, 'level': const.HA_LOG_LEVEL }, 'consul_config': { 'endpoint': consul_endpoint }, 'kafka_config': { 'endpoints': kafka_endpoint }, 'event_topic': 'hare', 'MONITOR': { 'message_type': health_comm_msg_type, 'producer_id': 'cluster_monitor' }, 'EVENT_MANAGER': { 'message_type': 'health_events', 'producer_id': 'system_health', 'consumer_group': 'health_monitor', 'consumer_id': '1' }, 'FAULT_TOLERANCE': { 'message_type': health_comm_msg_type, 'consumer_group': 'event_listener', 'consumer_id': '1' }, 'CLUSTER_STOP_MON': { 'message_type': 'cluster_stop', 'consumer_group': 'cluster_mon', 'consumer_id': '2' }, 'CLUSTER': { 'resource_type': ['node', 'disk', 'cvg', 'cluster'] }, 'SYSTEM_HEALTH': { 'num_entity_health_events': 2 } } if not os.path.isdir(const.CONFIG_DIR): os.mkdir(const.CONFIG_DIR) # Open config file and dump yaml data from conf_file_dict with open(const.HA_CONFIG_FILE, 'w+') as conf_file: yaml.dump(conf_file_dict, conf_file, default_flow_style=False) Cmd.copy_file(const.SOURCE_HEALTH_HIERARCHY_FILE, const.HEALTH_HIERARCHY_FILE) # First populate the ha.conf and then do init. Because, in the init, this file will # be stored in the confstore as key values ConfigManager.init("ha_setup") # Inside cluster.conf, cluster_id will be present under # "node".<actual POD machind id>."cluster_id". So, # in the similar way, confstore will have this key when # the cluster.conf load will taked place. # So, to get the cluster_id field from Confstore, we need machine_id self._cluster_id = Conf.get( self._index, f'node{_DELIM}{machine_id}{_DELIM}cluster_id') # site_id = Conf.get(self._index, f'node{_DELIM}{machine_id}{_DELIM}site_id') self._site_id = NOT_DEFINED # rack_id = Conf.get(self._index, f'node{_DELIM}{machine_id}{_DELIM}rack_id') self._rack_id = NOT_DEFINED self._storageset_id = NOT_DEFINED conf_file_dict.update({ 'COMMON_CONFIG': { 'cluster_id': self._cluster_id, 'rack_id': self._rack_id, 'site_id': self._site_id } }) # TODO: Verify whether these newly added config is avilable in the confstore or not with open(const.HA_CONFIG_FILE, 'w+') as conf_file: yaml.dump(conf_file_dict, conf_file, default_flow_style=False) self._confstore = ConfigManager.get_confstore() Log.info( f'Populating the ha config file with consul_endpoint: {consul_endpoint}' ) Log.info('Performing event_manager subscription') event_manager = EventManager.get_instance() event_manager.subscribe( const.EVENT_COMPONENT, [SubscribeEvent(const.POD_EVENT, ["online", "failed"])]) Log.info(f'event_manager subscription for {const.EVENT_COMPONENT}\ is successful for the event {const.POD_EVENT}') event_manager.subscribe( const.EVENT_COMPONENT, [SubscribeEvent(const.DISK_EVENT, ["online", "failed"])]) Log.info(f'event_manager subscription for {const.EVENT_COMPONENT}\ is successful for the event {const.DISK_EVENT}') Log.info('Creating cluster cardinality') self._confStoreAPI = ConftStoreSearch() self._confStoreAPI.set_cluster_cardinality(self._index) # Init cluster,site,rack health self._add_cluster_component_health() # Init node health self._add_node_health() # Init cvg and disk health self._add_cvg_and_disk_health() Log.info("config command is successful") sys.stdout.write("config command is successful.\n") except TypeError as type_err: sys.stderr.write( f'HA config command failed: Type mismatch: {type_err}.\n') except yaml.YAMLError as exc: sys.stderr.write( f'Ha config failed. Invalid yaml configuration: {exc}.\n') except OSError as os_err: sys.stderr.write(f'HA Config failed. OS_error: {os_err}.\n') except Exception as c_err: sys.stderr.write(f'HA config command failed: {c_err}.\n')
print("MESSAGE: ", message) global MSG MSG = True return CONSUMER_STATUS.SUCCESS_STOP if __name__ == '__main__': try: print("********Event Publisher********") event_manager = EventManager.get_instance() k8s_filter = K8SFilter() component = "hare" resource_type = K8S_ALERT_RESOURCE_TYPE.RESOURCE_TYPE_POD.value state = K8S_ALERT_STATUS.STATUS_FAILED.value message_type = event_manager.subscribe( 'hare', [SubscribeEvent(resource_type, [state])]) print(f"Subscribed {component}, message type is {message_type}") k8s_event = K8SAlert("cortx", "node2", "cortx-data123", K8S_ALERT_STATUS.STATUS_FAILED.value, K8S_ALERT_RESOURCE_TYPE.RESOURCE_TYPE_POD.value, "16215909572") timestamp = str(int(time.time())) event_id = timestamp + str(uuid.uuid4().hex) event_type = k8s_event.status if k8s_filter.filter_event(json.dumps(k8s_event.__dict__)): health_event = HealthEvent(event_id, event_type, EVENT_SEVERITIES.CRITICAL.value, "1", "1", "1", "1", "srvnode_1", "srvnode_1", "pod", "16215909572", "cortx-data-pod", {"namespace": "cortx"})