def check_all(interactive): all_success = NO_ERROR # 1: Install checked_install = check_installation(interactive) if (is_error(checked_install)): return checked_install else: all_success = checked_install print( "================================================================================" ) # 2: Connection checked_connection = check_connection(interactive) if (is_error(checked_connection)): return checked_connection else: all_success = checked_connection print( "================================================================================" ) # 3: Heartbeat checked_hb = check_heartbeat(interactive) if (is_error(checked_hb)): return checked_hb else: all_success = checked_hb print( "================================================================================" ) checked_highcpumem = check_high_cpu_memory(interactive) if (is_error(checked_highcpumem)): return checked_highcpumem else: all_success = checked_highcpumem print( "================================================================================" ) checked_syslog = check_syslog(interactive) if (is_error(checked_syslog)): return checked_syslog else: all_success = checked_syslog print( "================================================================================" ) checked_cl = check_custom_logs(interactive) if (is_error(checked_cl)): return checked_cl else: all_success = checked_cl return all_success
def check_syslog(interactive, prev_success=NO_ERROR): print("CHECKING FOR SYSLOG ISSUES...") success = prev_success # check if installed / connected / running correctly print("Checking if omsagent installed and running...") # check installation if (get_oms_version() == None): print_errors(ERR_OMS_INSTALL) print( "Running the installation part of the troubleshooter in order to find the issue..." ) print( "================================================================================" ) return check_installation(interactive, err_codes=False, prev_success=ERR_FOUND) # check connection checked_la_endpts = check_log_analytics_endpts() if (checked_la_endpts != NO_ERROR): print_errors(checked_la_endpts) print( "Running the connection part of the troubleshooter in order to find the issue..." ) print( "================================================================================" ) return check_connection(interactive, err_codes=False, prev_success=ERR_FOUND) # check running workspace_id = geninfo_lookup('WORKSPACE_ID') if (workspace_id == None): error_info.append(('Workspace ID', OMSADMIN_PATH)) return ERR_INFO_MISSING checked_omsagent_running = check_omsagent_running(workspace_id) if (checked_omsagent_running != NO_ERROR): print_errors(checked_omsagent_running) print( "Running the general health part of the troubleshooter in order to find the issue..." ) print( "================================================================================" ) return check_heartbeat(interactive, prev_success=ERR_FOUND) # check for service controller print("Checking if machine has a valid service controller...") checked_sc = check_service_controller() if (is_error(checked_sc)): return checked_sc else: success = print_errors(checked_sc) # check rsyslog / syslogng running print("Checking if machine has rsyslog or syslog-ng running...") checked_services = check_services() if (is_error(checked_services)): return print_errors(checked_services) else: success = print_errors(checked_services) # check for syslog.conf and syslog destination file print("Checking for syslog configuration files...") checked_conf_files = check_conf_files() if (is_error(checked_conf_files)): if (checked_conf_files in [ERR_OMS_INSTALL, ERR_FILE_MISSING]): print_errors(checked_conf_files) print( "Running the installation part of the troubleshooter in order to find the issue..." ) print( "================================================================================" ) return check_installation(interactive, err_codes=False, prev_success=ERR_FOUND) else: return print_errors(checked_conf_files) else: success = print_errors(checked_conf_files) return success
def check_custom_logs(interactive, prev_success=NO_ERROR): if (interactive): print(" To check if you are using custom logs, please go to https://ms.portal.azure.com\n"\ " and navigate to your workspace. Once there, please navigate to the 'Advanced\n"\ " settings' blade, and then go to 'Data' > 'Custom Logs'. There you should be\n"\ " to see any custom logs you may have.\n") using_cl = get_input("Are you currently using custom logs? (y/n)",\ (lambda x : x.lower() in ['y','yes','n','no']),\ "Please type either 'y'/'yes' or 'n'/'no' to proceed.") # not using custom logs if (using_cl in ['n', 'no']): print("Continuing on with the rest of the troubleshooter...") print( "================================================================================" ) return prev_success # using custom logs else: print("Continuing on with troubleshooter...") print( "--------------------------------------------------------------------------------" ) print("CHECKING FOR CUSTOM LOG ISSUES...") success = prev_success # check if installed / connected / running correctly print("Checking if omsagent installed and running...") # check installation if (get_oms_version() == None): print_errors(ERR_OMS_INSTALL) print( "Running the installation part of the troubleshooter in order to find the issue..." ) print( "================================================================================" ) return check_installation(interactive, err_codes=False, prev_success=ERR_FOUND) # check connection checked_la_endpts = check_log_analytics_endpts() if (checked_la_endpts != NO_ERROR): print_errors(checked_la_endpts) print( "Running the connection part of the troubleshooter in order to find the issue..." ) print( "================================================================================" ) return check_connection(interactive, err_codes=False, prev_success=ERR_FOUND) # check running workspace_id = geninfo_lookup('WORKSPACE_ID') if (workspace_id == None): error_info.append(('Workspace ID', OMSADMIN_PATH)) return ERR_INFO_MISSING checked_omsagent_running = check_omsagent_running(workspace_id) if (checked_omsagent_running != NO_ERROR): print_errors(checked_omsagent_running) print( "Running the general health part of the troubleshooter in order to find the issue..." ) print( "================================================================================" ) return check_heartbeat(interactive, prev_success=ERR_FOUND) # check customlog.conf print("Checking for custom log configuration files...") checked_clconf = check_customlog_conf(interactive) if (is_error(checked_clconf)): return print_errors(checked_clconf) else: success = print_errors(checked_clconf) return success
def check_high_cpu_memory(interactive, prev_success=NO_ERROR): print("CHECKING FOR HIGH CPU / MEMORY USAGE...") success = prev_success # check if installed / connected / running correctly print("Checking if omsagent installed and running...") # check installation if (get_oms_version() == None): print_errors(ERR_OMS_INSTALL) print("Running the installation part of the troubleshooter in order to find the issue...") print("================================================================================") return check_installation(interactive, err_codes=False, prev_success=ERR_FOUND) # check connection checked_la_endpts = check_log_analytics_endpts() if (checked_la_endpts != NO_ERROR): print_errors(checked_la_endpts) print("Running the connection part of the troubleshooter in order to find the issue...") print("================================================================================") return check_connection(interactive, err_codes=False, prev_success=ERR_FOUND) # check running workspace_id = geninfo_lookup('WORKSPACE_ID') if (workspace_id == None): error_info.append(('Workspace ID', OMSADMIN_PATH)) return ERR_INFO_MISSING checked_omsagent_running = check_omsagent_running(workspace_id) if (checked_omsagent_running != NO_ERROR): print_errors(checked_omsagent_running) print("Running the general health part of the troubleshooter in order to find the issue...") print("================================================================================") return check_heartbeat(interactive, prev_success=ERR_FOUND) # TODO: decide if should keep this in or not # check disk space # print("Checking recent modifications to largest files...") # checked_disk_space = check_disk_space() # if (checked_disk_space != NO_ERROR): # return print_errors(checked_disk_space) # check log rotation print("Checking if log rotation is working correctly...") checked_logrot = check_log_rotation() if (is_error(checked_logrot)): return print_errors(checked_logrot) else: success = print_errors(checked_logrot) # check CPU capacity print("Checking if OMI is at 100% CPU (may take some time)...") checked_highcpu = check_omi_cpu() if (is_error(checked_highcpu)): return print_errors(checked_highcpu) else: success = print_errors(checked_highcpu) # check slab memory / dentry cache issue print("Checking slab memory / dentry cache usage...") checked_slabmem = check_slab_memory() if (is_error(checked_slabmem)): return print_errors(checked_slabmem) else: success = checked_slabmem return success