def check_ps4(ps4): """ check whether ps4 is up or not if it's up outside of "normal" hours, log that event and send an email """ global g_msg, g_short_msg, g_atLeastOneProblem now1 = datetime.datetime.now() psUp = smart_ping(ps4, g_max_ping_errors) if psUp: create_event("ps4", "ps4 is up") # ps4OK = ((not psUp) or # ( # (psUp) and # ( # (now1.hour >= 21) and # ( # (now1.hour < 22) or # (now1.hour == 22) and (now1.minute < 30) # ) # ) # ) # ) ps4OK = ((not psUp) or ((psUp) and is_time_between('21:00:00', '22:30:00'))) if not ps4OK: g_atLeastOneProblem = True g_msg = g_msg + myCheck(ps4OK) + \ "PS4 seems to be up when it shouldn't !\n<p>" g_short_msg = "ps4" if g_short_msg == "" else g_short_msg + " - ps4" else: logging.info("check_ps4 OK") return ps4OK
def check_smart_mustBeUp(): """ check that all servers and other appliances which must be always up are up """ global g_msg, g_short_msg, g_atLeastOneProblem # mustBeUp = [ # ("voo_router", "192.168.0.1"), # ("webcamChalet", "192.168.0.4"), # # ("HP5520 printer", "192.168.0.6"), # ("alarmSystem", "192.168.0.9"), # ("linksysed_router","192.168.0.50"), # #("sd4", "192.168.0.94"), # ("vayo", "192.168.0.45"), # ("sd3", "192.168.0.73"), # ("sd5", "192.168.0.75"), # ("sd8", "192.168.0.78") # ] # print(params.mustBeUp) everybody_up = True for (name, ip) in params.mustBeUp: # print("{} with ip {} should be up".format(name,ip)) max_errors = 5 isUp = smart_ping(ip, g_max_ping_errors) if not isUp: g_atLeastOneProblem = True everybody_up = False g_msg = g_msg + \ myCheck(isUp) + "%s is %s" % (name, "up" if isUp else "down") + "\n<p>" g_short_msg = name if g_short_msg == "" else g_short_msg + " - " + name pass if everybody_up: logging.info("check_smart_mustBeUp OK")
def check_lastWindowDatetime(): """ if mypc3 is up then check if getwindow was OK very recently, otherwise check that it ran at least 24h ago """ global g_msg, g_short_msg, g_atLeastOneProblem #lastGetWindowStr = "2018-03-03 21:37:43"; #lastGetWindowDate = datetime.datetime.strptime(lastGetWindowStr, '%Y-%m-%d %H:%M:%S') # lastUploadingFileDatetime = getLastEventDatetime("uploading file") # isUploadingFileOK = (now1 <= lastUploadingFileDatetime + delayUploadingfile) # if not isUploadingFileOK: # g_atLeastOneProblem = True # g_msg = g_msg + myCheck(isUploadingFileOK) + "lastUploadingFileDatetime: " + lastUploadingFileDatetime.strftime('%Y-%m-%d %H:%M:%S') + "\n<p>" now1 = datetime.datetime.now() lastGetWindowDatetime = getLastWindowDatetime() mypc3Up = smart_ping(mypc3, g_max_ping_errors) if (mypc3Up): isGetLastWindowOK = (now1 <= lastGetWindowDatetime + shortDelayGetLastWindow) else: isGetLastWindowOK = (now1 <= lastGetWindowDatetime + longDelayGetLastWindow) if not isGetLastWindowOK: #g_atLeastOneProblem = True g_msg = g_msg + myCheck(isGetLastWindowOK) + "lastGetWindowDatetime : " + \ lastGetWindowDatetime.strftime('%Y-%m-%d %H:%M:%S') + "\n<p>" g_short_msg = "getLastWindow" if g_short_msg == "" else g_short_msg + " - getLastWindow"
def check_ubuntu_mustBeUp(): """ if mypc3 is up, then check that Ubuntu is up """ global g_msg, g_short_msg, g_atLeastOneProblem ubuntu2 = "192.168.0.52" ubuntuIsUp = False mypc3IsUp = smart_ping(mypc3, g_max_ping_errors) if mypc3IsUp: name = ubuntu2 ubuntuIsUp = smart_ping(name, g_max_ping_errors) if not ubuntuIsUp: g_atLeastOneProblem = True g_msg = g_msg + myCheck(ubuntuIsUp) + "%s is %s" % ( name, "up" if ubuntuIsUp else "down") + "\n<p>" g_short_msg = name if g_short_msg == "" else g_short_msg + " - " + name if not mypc3IsUp or ubuntuIsUp: logging.info("check_ubuntu_mustBeUp OK")
def check_pi73_mustBeUp(): """ there is a temporary problem with the wlan0 of 192.168.0.73, temporarily replaced by wlan1 at 192.168.0.83 """ global g_msg, g_short_msg, g_atLeastOneProblem pi73 = "192.168.0.73" pi73IsUp = False name = pi73 pi73IsUp = smart_ping(name, g_max_ping_errors) if not pi73IsUp: g_atLeastOneProblem = True g_msg = g_msg + myCheck(pi73IsUp) + "%s is %s" % ( name, "up" if pi73IsUp else "down") + "\n<p>" g_short_msg = name if g_short_msg == "" else g_short_msg + " - " + name if pi73IsUp: logging.info("check_pi73_mustBeUp OK")
def check_main_watchdog_up(main_watchdog): """ check that the main_watchdog is up or not returns whether or not the main_watchdog is up (True) or down (False) """ global g_msg, g_short_msg, g_atLeastOneProblem # print("{} with ip {} should be up".format(name,ip)) isUp = smart_ping(main_watchdog, g_max_ping_errors) if not isUp: g_atLeastOneProblem = True main_watchdog_down = True g_msg = g_msg + myCheck( isUp) + "%s is %s" % ("main_watchdog ({})".format(main_watchdog), "up" if isUp else "down") + "\n<p>" g_short_msg = main_watchdog if g_short_msg == "" else g_short_msg + " - " + main_watchdog return False else: return True
def send_email_if_needed(): global g_msg, g_short_msg, g_atLeastOneProblem now1 = datetime.datetime.now() sendAnyway = (now1.hour == systematicEmailSendTimeHH) and ( now1.minute < systematicEmailSendTimeMM) and ( now1.minute + 10 > systematicEmailSendTimeMM) mypc3Up = smart_ping(mypc3, g_max_ping_errors) if g_atLeastOneProblem or sendAnyway: if not g_atLeastOneProblem: g_msg = g_msg + "everything seems to be OK" g_msg = g_msg + \ "(NB : mypc3 is %s)" % ("up" if mypc3Up else "down") + "\n<p>" g_msg = g_msg + "sending anyway because it's time to recap the situation" + "\n<p>" else: g_msg = g_msg + "😬 😬 at least one problem found; email sent !" + "\n<p>" g_msg = g_msg + "\n<p>" + "Don't forget to fix the lastGetWindowDatetime problem !!!" ''' print(myCheck(isBackupOK) + "lastBackupDatetime : " + lastBackupDatetime.strftime('%Y-%m-%d %H:%M:%S')) print(myCheck(isGetLastWindowOK) + "lastGetWindowDatetime : " + lastGetWindowDatetime.strftime('%Y-%m-%d %H:%M:%S')) #print(myCheck(isUploadingFileOK) + "lastUploadingFileDatetime: " + lastUploadingFileDatetime.strftime('%Y-%m-%d %H:%M:%S')) ''' email(g_msg)
def main(): """ Watchdog program checks the various servers on the network and send email notications when something weird is detected """ global g_msg, g_short_msg, g_atLeastOneProblem, g_params utils.init_logger('INFO') logging.info( "------------------------------------------------------------") logging.info("Starting watchdog") # logging.warning("!!!!!!!!!!! don't forget to fix the problem with import and sys.path.instert") ver = getVersion() #logging.info("current version : {}".format(ver)) # logging.info("python version : " + sys.version) #logging.info("myHostname : "+myHostname) # don't run the watchdog when some servers are rebooting skip_watchdog = is_time_between('02:59:59', '03:03:00') if skip_watchdog: logging.info( "Skipping this one given we are in a period of server reboots") else: # check if this server is the main_watchdog of not main_watchdog = g_params.main_watchdog I_am_main_watchdog = (main_watchdog == myHostname) # check if the main_watchdog is up or not is_main_watchdog_up = smart_ping(main_watchdog, g_max_ping_errors) # if I am not the main_watchdog and the main watchdog is up, don't do nothing if not I_am_main_watchdog and is_main_watchdog_up: logging.info( f"I am not the main_watchdog, so I just checked the main_watchdog ({main_watchdog}) is up" ) else: if not I_am_main_watchdog: logging.error( f"##### I am not the main watchdog, but the main_watchdog ({main_watchdog}) seems down, so I'll do all the checks" ) # logging.warning("!!!!! don't forget to fix the import create_event problem !!!!") if time_elapsed(15): check_pool(delay_pool_secs) if time_elapsed(60): check_power(delay_power_secs) if time_elapsed(5): check_frigo(delay_frigo_secs) if time_elapsed(10): check_ps4(ps4) if time_elapsed(5): check_smart_mustBeUp() if time_elapsed(120): check_pi73_mustBeUp() if time_elapsed(30): check_ubuntu_mustBeUp() if time_elapsed(60): check_macaddresses() if time_elapsed(120): check_backup(delay_backup_secs) # check_lastWindowDatetime() logging.warning( "!!!! check_lastWindowDatetime has been deactivated !!!!!!") if not skip_watchdog: send_email_if_needed() if g_short_msg != "": logging.info(f"!!!!! Problems : {g_short_msg}") logging.info(f"!!!!! Details : {g_msg}") logging.info("Ending watchdog") utils.shutdown_logger()