def _get_current_processes(self): # Add all current processes to make sure nothing misses the radar process_state_db = {} # list of all processes on the node is made here all_processes = self.process_info_manager.get_all_processes() self.msg_log("DBG: get_current_processes: '%s'" % all_processes, SandeshLevel.SYS_DEBUG) for proc_info in all_processes: proc_name = self._get_process_name(proc_info) proc_pid = int(proc_info['pid']) stat = ProcessStat(proc_name, host_ip=self.hostip, hostname=self.hostname) stat.process_state = proc_info['statename'] if 'start' in proc_info: stat.start_time = str(proc_info['start']) stat.start_count += 1 stat.pid = proc_pid if stat.group not in self.group_names: self.group_names.append(stat.group) if not stat.group in process_state_db: process_state_db[stat.group] = {} process_state_db[stat.group][proc_name] = stat return process_state_db
def _send_process_state(self, process_info): pname = self._get_process_name(process_info) # update process stats if pname in list(key for group in self.process_state_db for key in self.process_state_db[group]): for group in self.process_state_db: if pname in self.process_state_db[group]: proc_stat = self.process_state_db[group][pname] else: proc_stat = ProcessStat(pname, host_ip=self.hostip, hostname=self.hostname) pstate = process_info['state'] proc_stat.process_state = pstate send_uve = False if (pstate == 'PROCESS_STATE_RUNNING'): proc_stat.start_count += 1 proc_stat.start_time = str(int(time.time() * 1000000)) send_uve = True proc_stat.pid = int(process_info['pid']) if (pstate == 'PROCESS_STATE_STOPPED'): proc_stat.stop_count += 1 send_uve = True proc_stat.stop_time = str(int(time.time() * 1000000)) proc_stat.last_exit_unexpected = False proc_stat.last_cpu = None proc_stat.last_time = 0 if (pstate == 'PROCESS_STATE_EXITED'): proc_stat.exit_count += 1 send_uve = True proc_stat.exit_time = str(int(time.time() * 1000000)) proc_stat.last_cpu = None proc_stat.last_time = 0 if not process_info['expected']: self.msg_log( '%s with pid: %s exited abnormally' % (pname, process_info['pid']), SandeshLevel.SYS_ERR) proc_stat.last_exit_unexpected = True send_init_uve = False # update process state database if not proc_stat.group in self.process_state_db: self.process_state_db[proc_stat.group] = {} send_init_uve = True self.process_state_db[proc_stat.group][pname] = proc_stat if send_uve: if (send_init_uve): self._send_init_info(proc_stat.group) self.send_process_state_db([proc_stat.group])
def _send_process_state(self, process_info): pname = self._get_process_name(process_info) # update process stats if pname in list(key for group in self.process_state_db for key in self.process_state_db[group]): for group in self.process_state_db: if pname in self.process_state_db[group]: proc_stat = self.process_state_db[group][pname] else: proc_stat = ProcessStat(pname, host_ip=self.hostip) pstate = process_info['state'] proc_stat.process_state = pstate send_uve = False if (pstate == 'PROCESS_STATE_RUNNING'): proc_stat.start_count += 1 proc_stat.start_time = str(int(time.time() * 1000000)) send_uve = True proc_stat.pid = int(process_info['pid']) if (pstate == 'PROCESS_STATE_STOPPED'): proc_stat.stop_count += 1 send_uve = True proc_stat.stop_time = str(int(time.time() * 1000000)) proc_stat.last_exit_unexpected = False proc_stat.last_cpu = None proc_stat.last_time = 0 if (pstate == 'PROCESS_STATE_EXITED'): proc_stat.exit_count += 1 send_uve = True proc_stat.exit_time = str(int(time.time() * 1000000)) proc_stat.last_cpu = None proc_stat.last_time = 0 if not process_info['expected']: self.msg_log('%s with pid: %s exited abnormally' % (pname, process_info['pid']), SandeshLevel.SYS_ERR) proc_stat.last_exit_unexpected = True send_init_uve = False # update process state database if not proc_stat.group in self.process_state_db: self.process_state_db[proc_stat.group] = {} send_init_uve = True self.process_state_db[proc_stat.group][pname] = proc_stat if send_uve: if (send_init_uve): self._send_init_info(proc_stat.group) self.send_process_state_db([proc_stat.group])
def _get_current_processes(self): # Add all current processes to make sure nothing misses the radar process_state_db = {} # list of all processes on the node is made here all_processes = self.process_info_manager.get_all_processes() self.msg_log("DBG: get_current_processes: '%s'" % all_processes, SandeshLevel.SYS_DEBUG) for proc_info in all_processes: proc_name = self._get_process_name(proc_info) proc_pid = int(proc_info['pid']) stat = ProcessStat(proc_name, host_ip=self.hostip) stat.process_state = proc_info['statename'] if 'start' in proc_info: stat.start_time = str(proc_info['start']) stat.start_count += 1 stat.pid = proc_pid if stat.group not in self.group_names: self.group_names.append(stat.group) if not stat.group in process_state_db: process_state_db[stat.group] = {} process_state_db[stat.group][proc_name] = stat return process_state_db
def send_process_state(self, process_info): pname = self.get_process_name(process_info) # update process stats if pname in list(key for group in self.process_state_db for key in self.process_state_db[group]): for group in self.process_state_db: if pname in self.process_state_db[group]: proc_stat = self.process_state_db[group][pname] else: proc_stat = ProcessStat(pname) pstate = process_info['state'] proc_stat.process_state = pstate send_uve = False if (pstate == 'PROCESS_STATE_RUNNING'): proc_stat.start_count += 1 proc_stat.start_time = str(int(time.time() * 1000000)) send_uve = True proc_stat.pid = int(process_info['pid']) if (pstate == 'PROCESS_STATE_STOPPED'): proc_stat.stop_count += 1 send_uve = True proc_stat.stop_time = str(int(time.time() * 1000000)) proc_stat.last_exit_unexpected = False proc_stat.last_cpu = None proc_stat.last_time = 0 if (pstate == 'PROCESS_STATE_EXITED'): proc_stat.exit_count += 1 send_uve = True proc_stat.exit_time = str(int(time.time() * 1000000)) proc_stat.last_cpu = None proc_stat.last_time = 0 if not process_info['expected']: self.msg_log( '%s with pid: %s exited abnormally' % (pname, process_info['pid']), SandeshLevel.SYS_ERR) proc_stat.last_exit_unexpected = True # check for core file for this exit find_command_option = ("find " + self.get_corefile_path() + " -name core.[A-Za-z]*." + process_info['pid'] + "*") self.msg_log( 'find command option for cores: %s' % (find_command_option), SandeshLevel.SYS_DEBUG) (corename, _) = Popen(find_command_option.split(), stdout=PIPE, close_fds=True).communicate() if ((corename is not None) and (len(corename.rstrip()) >= 1)): self.msg_log('core file: %s' % (corename), SandeshLevel.SYS_ERR) # before adding to the core file list make # sure that we do not have too many cores self.msg_log( 'core_file_list: %s, max_cores: %d' % (str(proc_stat.core_file_list), self.max_cores), SandeshLevel.SYS_DEBUG) if (len(proc_stat.core_file_list) >= self.max_cores): # get rid of old cores start = self.max_old_cores end = len(proc_stat.core_file_list) - \ self.max_new_cores + 1 core_files_to_be_deleted = \ proc_stat.core_file_list[start:end] self.remove_corefiles(core_files_to_be_deleted) # now delete the cores from the list as well del proc_stat.core_file_list[start:end] # now add the new core to the core file list proc_stat.core_file_list.append(corename.rstrip()) self.msg_log( '# of cores for %s: %d' % (pname, len(proc_stat.core_file_list)), SandeshLevel.SYS_DEBUG) send_init_uve = False # update process state database if not proc_stat.group in self.process_state_db: self.process_state_db[proc_stat.group] = {} send_init_uve = True self.process_state_db[proc_stat.group][pname] = proc_stat if send_uve: if (send_init_uve): self.send_init_info(proc_stat.group) self.send_process_state_db([proc_stat.group])
def _send_process_state(self, process_info): pname = self._get_process_name(process_info) # update process stats if pname in list(key for group in self.process_state_db for key in self.process_state_db[group]): for group in self.process_state_db: if pname in self.process_state_db[group]: proc_stat = self.process_state_db[group][pname] else: proc_stat = ProcessStat(pname, host_ip=self.hostip) pstate = process_info['state'] proc_stat.process_state = pstate send_uve = False if (pstate == 'PROCESS_STATE_RUNNING'): proc_stat.start_count += 1 proc_stat.start_time = str(int(time.time() * 1000000)) send_uve = True proc_stat.pid = int(process_info['pid']) if (pstate == 'PROCESS_STATE_STOPPED'): proc_stat.stop_count += 1 send_uve = True proc_stat.stop_time = str(int(time.time() * 1000000)) proc_stat.last_exit_unexpected = False proc_stat.last_cpu = None proc_stat.last_time = 0 if (pstate == 'PROCESS_STATE_EXITED'): proc_stat.exit_count += 1 send_uve = True proc_stat.exit_time = str(int(time.time() * 1000000)) proc_stat.last_cpu = None proc_stat.last_time = 0 if not process_info['expected']: self.msg_log('%s with pid: %s exited abnormally' % (pname, process_info['pid']), SandeshLevel.SYS_ERR) proc_stat.last_exit_unexpected = True # check for core file for this exit corename = self.system_data.find_corefile( "core.[A-Za-z]*." + process_info['pid'] + "*") if ((corename is not None) and (len(corename.rstrip()) >= 1)): self.msg_log('core file: %s' % (corename), SandeshLevel.SYS_ERR) # before adding to the core file list make # sure that we do not have too many cores self.msg_log('core_file_list: %s, max_cores: %d' % (str(proc_stat.core_file_list), self.max_cores), SandeshLevel.SYS_DEBUG) if (len(proc_stat.core_file_list) >= self.max_cores): # get rid of old cores start = self.max_old_cores end = len(proc_stat.core_file_list) - \ self.max_new_cores + 1 core_files_to_be_deleted = \ proc_stat.core_file_list[start:end] self.system_data.remove_corefiles(core_files_to_be_deleted) # now delete the cores from the list as well del proc_stat.core_file_list[start:end] # now add the new core to the core file list proc_stat.core_file_list.append(corename.rstrip()) self.msg_log('# of cores for %s: %d' % (pname, len(proc_stat.core_file_list)), SandeshLevel.SYS_DEBUG) send_init_uve = False # update process state database if not proc_stat.group in self.process_state_db: self.process_state_db[proc_stat.group] = {} send_init_uve = True self.process_state_db[proc_stat.group][pname] = proc_stat if send_uve: if (send_init_uve): self._send_init_info(proc_stat.group) self.send_process_state_db([proc_stat.group])