def event(reason, version_info): """! State machine event loop. The three possible states are: 1. initial 2. disconnected 3. connected The graphical FSM may look like: ___ | |(resend_event) ___|___|________ ____________ init_connect_timeout | | | | ------------------------->| disconnected | | initial |-------------------------->|______________| |__________| (wdt_trigger) | | | | | (wdt_trigger) | (wdt_trigger) | | | _|__________|__ | wdt_connect | | |--------------------------->| connected | |_____________| | | |___| (resend_event) """ global g_curr_wdt_state mgmtd_pids = [] old_wdt_state = g_curr_wdt_state new_wdt_state = None mgmtd_pids = Vsp.get_pids('mgmtd') if mgmtd_pids == None or g_mgmtd_pid not in mgmtd_pids: Logging.log(Logging.LOG_ERR, "Unexpected termination of mgmtd, kill watchdog!") sys.exit() # # wish python had switch statement, that would make it more pretty. # # Based on the current state, call appropriate state handler function, # this function based on given input check if state needs to be changed, # if yes, then would take required action as well (send event). if g_curr_wdt_state in STATE_HANDLER_DICT.keys(): new_wdt_state = \ STATE_HANDLER_DICT[g_curr_wdt_state](reason, version_info) else: # unknown state detected, KILL ME !!!! error_str = "Unknown state: %s, reset to initial" % g_curr_wdt_state Logging.log(Logging.LOG_ERR, error_str) sys.exit() if old_wdt_state != new_wdt_state: set_state(new_wdt_state, reason, version_info) else: # No state change detected. Verify if we need to resend event. check_resend_event(reason, version_info)
def main(): """! Entry point to the watchdog. Initialize logger and starts attempting to communicate with ESXi """ global g_mgmtd_pid g_mgmtd_pid = None mgmtd_pids = [] Logging.log_init('esxi_watchdog', 'esxi_watchdog', 0, Logging.component_id(Logging.LCI_VSP), Logging.LOG_DEBUG, Logging.LOG_LOCAL0, Logging.LCT_SYSLOG) Logging.log(Logging.LOG_INFO, "esxi watchdog started") # Bug 117274: It may happen that we get multiple pids for mgmtd process, # pidof ran between fork-exec call, retry to allow mgmtd to settle for i in range(1, MAX_MGMTD_SETTLE_RETRY): mgmtd_pids = Vsp.get_pids('mgmtd') if len(mgmtd_pids) > 1: # multiple pids detected, give mgmtd sometime to settle time.sleep(MGMTD_SETTLE_TIMEOUT) else: g_mgmtd_pid = mgmtd_pids[0] break # Bug 112192: monitor mgmtd pid, if mgmtd crashes/exits # terminate watchdog as well if g_mgmtd_pid == None: # mgmtd not up kill watchdog process Logging.log(Logging.LOG_ERR, "Mgmtd is not ready, kill watchdog!") sys.exit(); Mgmt.open() signal.signal(signal.SIGINT, terminate_handler) signal.signal(signal.SIGTERM, terminate_handler) signal.signal(signal.SIGQUIT, terminate_handler) # Invalidate the session file if it exists on startup if os.path.exists(SESSION_FILE): os.remove(SESSION_FILE) monitor_esxi() Mgmt.close()