def runforever(self, test=False): while 1: # we explicitly use self.stdin, self.stdout, and self.stderr # instead of sys.* so we can unit test this code headers, payload = childutils.listener.wait(self.stdin, self.stdout) pheaders, pdata = childutils.eventdata(payload+'\n') #self.stderr.write(headers['eventname'] + '\n') #self.stderr.flush() if headers['eventname'] == 'PROCESS_STATE_UNKNOWN': msg = ('Process %(processname)s in group %(groupname)s UNKNOWN from state %(from_state)s' % pheaders) subject = ' %s UNKNOWN at %s' % (pheaders['processname'], childutils.get_asctime()) # elif headers['eventname'] == 'PROCESS_STATE_STARTING': # msg = ('Process %(processname)s in group %(groupname)s STARTING from state %(from_state)s' % pheaders) # subject = ' %s STARTING at %s' % (pheaders['processname'], childutils.get_asctime()) elif headers['eventname'] == 'PROCESS_STATE_RUNNING': msg = ('Process %(processname)s in group %(groupname)s RUNNING (pid %(pid)s) from state %(from_state)s' % pheaders) subject = ' %s RUNNING at %s' % (pheaders['processname'], childutils.get_asctime()) elif headers['eventname'] == 'PROCESS_STATE_BACKOFF': msg = ('Process %(processname)s in group %(groupname)s BACKOFF from state %(from_state)s' % pheaders) subject = ' %s BACKOFF at %s' % (pheaders['processname'], childutils.get_asctime()) # elif headers['eventname'] == 'PROCESS_STATE_STOPPING': # msg = ('Process %(processname)s in group %(groupname)s STOPPING from state %(from_state)s' % pheaders) # subject = ' %s STOPPING at %s' % (pheaders['processname'], childutils.get_asctime()) elif headers['eventname'] == 'PROCESS_STATE_STOPPED': msg = ('Process %(processname)s in group %(groupname)s STOPPED from state %(from_state)s' % pheaders) subject = ' %s STOPPED at %s' % (pheaders['processname'], childutils.get_asctime()) elif headers['eventname'] == 'PROCESS_STATE_EXITED': msg = ('Process %(processname)s in group %(groupname)s EXITED unexpectedly (pid %(pid)s) from state %(from_state)s' % pheaders) subject = ' %s EXITED at %s' % (pheaders['processname'], childutils.get_asctime()) elif headers['eventname'] == 'PROCESS_STATE_FATAL': msg = ('Process %(processname)s in group %(groupname)s FATAL from state %(from_state)s' % pheaders) subject = ' %s FATAL at %s' % (pheaders['processname'], childutils.get_asctime()) else: childutils.listener.ok(self.stdout) if test: self.stderr.write('non-exited event\n') self.stderr.flush() break continue if self.optionalheader: subject = self.optionalheader + ':' + subject self.stderr.write('unexpected exit, mailing\n') self.stderr.flush() self.mail(self.email, subject, msg) childutils.listener.ok(self.stdout) if test: break
def handle_event(payload): ''' Execute the post script when the monitored events happen ''' pheaders, pdata = childutils.eventdata(payload+'\n') name_list = pheaders['groupname'].split('--') if len(name_list) == 3: service, cluster, job = name_list else: return None childutils.pcomm.stderr(childutils.get_asctime()+' Process %(processname)s ' 'in group %(groupname)s exited from state %(from_state)s. ' 'Now execute the post script.\n' % pheaders) supervisor_config_path = '%s/../supervisord.conf' % os.path.dirname(__file__) if not os.path.exists(supervisor_config_path): childutils.pcomm.stderr('Cannot find the config file: supervisord.conf.\n') parser = ConfigParser.SafeConfigParser() parser.read([supervisor_config_path]) sys.path.append('%s/../deployment' % os.path.dirname(__file__)) from rpcinterface import DEFAULT_APP_ROOT app_root = parser.get('rpcinterface:deployment', 'app_root', DEFAULT_APP_ROOT) service_root = '%s/%s/%s/%s' % (app_root, service, cluster, job) if not os.path.exists('%s/post.sh' % service_root): childutils.pcomm.stderr('No post.sh for %s found.\n' % service) return None cmd = ['/bin/bash', '%s/post.sh' % service_root] subprocess.call(cmd)
def process_job(self, process, jobs): """ Start the process on the relevant address. Return True if process is starting. """ reset_flag = True # process must be stopped if process.stopped(): namespec = process.namespec() address = get_address(self.supvisors, self.strategy, process.rules.addresses, process.rules.expected_loading) if address: self.logger.info('try to start {} at address={}'.format( namespec, address)) # use asynchronous xml rpc to start program self.supvisors.zmq.pusher.send_start_process(address, namespec, process.extra_args) # push to jobs and timestamp process process.request_time = time.time() self.logger.debug('{} requested to start at {}'.format( namespec, get_asctime(process.request_time))) jobs.append(process) reset_flag = False # reset extra arguments process.extra_args = '' else: self.logger.warn('no resource available to start {}'.format( namespec)) self.force_process_fatal(namespec, 'no resource available') # due to failure, reset ignore_wait_exit flag if reset_flag: process.ignore_wait_exit = False # return True when process is starting return not reset_flag
def runforever(self): while 1: # we explicitly use self.stdin, self.stdout, and self.stderr # instead of sys.* so we can unit test this code headers, payload = childutils.listener.wait(self.stdin, self.stdout) pheaders, pdata = childutils.eventdata(payload + '\n') pheaders['eventname'] = headers['eventname'].split('_')[-1] self.stderr.write(str(self.excluded)) if not headers['eventname'] == 'PROCESS_STATE_EXITED' and not pheaders['from_state'] == 'EXITED' and not \ headers['eventname'] == 'PROCESS_STATE_FATAL': # do nothing with non-TICK events childutils.listener.ok(self.stdout) continue if pheaders['processname'] in self.excluded: # do nothing with excluded processes childutils.listener.ok(self.stdout) continue if not self.any and pheaders['processname'] not in self.programs: # do nothing with processes not asked childutils.listener.ok(self.stdout) continue msg = ('Process %(processname)s, in group %(groupname)s, ' ' moved to %(eventname)s from state %(from_state)s' % pheaders) subject = ' %s %s at %s' % (pheaders['processname'], pheaders['eventname'], childutils.get_asctime()) if self.optionalheader: subject = self.optionalheader + ':' + subject self.mail(subject, msg) childutils.listener.ok(self.stdout)
def process_job(self, process, jobs): """ Start the process on the relevant address. Return True if process is starting. """ reset_flag = True # process must be stopped if process.stopped(): namespec = process.namespec() address = get_address(self.supvisors, self.strategy, process.rules.addresses, process.rules.expected_loading) if address: self.logger.info('try to start {} at address={}'.format( namespec, address)) # use asynchronous xml rpc to start program self.supvisors.zmq.pusher.send_start_process( address, namespec, process.extra_args) # push to jobs and timestamp process process.request_time = time.time() self.logger.debug('{} requested to start at {}'.format( namespec, get_asctime(process.request_time))) jobs.append(process) reset_flag = False # reset extra arguments process.extra_args = '' else: self.logger.warn( 'no resource available to start {}'.format(namespec)) self.force_process_fatal(namespec, 'no resource available') # due to failure, reset ignore_wait_exit flag if reset_flag: process.ignore_wait_exit = False # return True when process is starting return not reset_flag
def get_process_state_change_msg(self, headers, payload): pheaders, pdata = childutils.eventdata(payload + "\n") if int(pheaders["expected"]): return None txt = "[%(groupname)s:%(processname)s](%(pid)s) exited unexpectedly" % pheaders return "%s %s" % (txt, childutils.get_asctime(self.now))
def get_process_state_change_msg(self, headers, payload): pheaders, pdata = childutils.eventdata(payload+'\n') if int(pheaders['expected']): return None txt = 'Process %(groupname)s:%(processname)s (pid %(pid)s) died \ unexpectedly' % pheaders return '%s -- %s' % (childutils.get_asctime(self.now), txt)
def get_process_state_change_msg(self, headers, payload): pheaders, pdata = childutils.eventdata(payload + '\n') if int(pheaders['expected']): return None txt = 'Process %(groupname)s:%(processname)s (pid %(pid)s) died \ unexpectedly' % pheaders return '%s -- %s' % (childutils.get_asctime(self.now), txt)
def runforever(self, test=False): while 1: # we explicitly use self.stdin, self.stdout, and self.stderr # instead of sys.* so we can unit test this code headers, payload = childutils.listener.wait( self.stdin, self.stdout) if not headers['eventname'] == 'PROCESS_STATE_EXITED': # do nothing with non-TICK events childutils.listener.ok(self.stdout) if test: self.stderr.write('non-exited event\n') self.stderr.flush() break continue pheaders, pdata = childutils.eventdata(payload + '\n') if int(pheaders['expected']): childutils.listener.ok(self.stdout) if test: self.stderr.write('expected exit\n') self.stderr.flush() break continue msg = ('Process %(processname)s in group %(groupname)s exited ' 'unexpectedly (pid %(pid)s) from state %(from_state)s' % pheaders) # Supervisor will inject the SUPERVISOR_SERVER_URL into each of the # supervised processes, with the address (tcp, unix socket...) of the # supervisor RPC server URL. if os.environ.get('SUPERVISOR_SERVER_URL'): # Get last lines from both stdout and stderr stdout_tail = self.tail(pheaders['groupname'], pheaders['processname'], 'stdout', self.logtail) stderr_tail = self.tail(pheaders['groupname'], pheaders['processname'], 'stderr', self.logtail) msg = '%s\n\nLast lines from stdout:\n%s\n\nLast lines from stderr:\n%s' % ( msg, stdout_tail, stderr_tail) subject = ' %s crashed at %s' % (pheaders['processname'], childutils.get_asctime()) if self.optionalheader: subject = self.optionalheader + ':' + subject self.stderr.write('unexpected exit, mailing\n') self.stderr.flush() self.mail(self.email, subject, msg) childutils.listener.ok(self.stdout) if test: break
def get_process_state_change_msg(self, headers, payload): pheaders, pdata = childutils.eventdata(payload + '\n') if int(pheaders['expected']): return None txt = '[%(groupname)s:%(processname)s](%(pid)s) exited unexpectedly' \ % pheaders return '%s %s' % (txt, childutils.get_asctime(self.now))
def get_process_state_change_msg(self, headers, payload): pheaders, pdata = childutils.eventdata(payload+'\n') if int(pheaders['expected']): return None txt = '[%(groupname)s:%(processname)s](%(pid)s) exited unexpectedly' \ % pheaders return '%s %s' % (txt, childutils.get_asctime(self.now))
def main(cmd, arg): rpcinterface = childutils.getRPCInterface(conf) #while 1: # headers, payload = childutils.listener.wait() # if headers['eventname'].startswith('TICK'): print >>sys.stderr, childutils.get_asctime(), ':', cmd, arg rpcinterface.supervisor.sendRemoteCommEvent(cmd, arg + '\n')
def get_process_state_change_msg(self, headers, payload): pheaders, pdata = childutils.eventdata(payload + '\n') if int(pheaders['expected']): return None self.add_customized_mail_list(pheaders) txt = 'Process %(groupname)s:%(processname)s (pid %(pid)s) died \ unexpectedly' % pheaders return '%s -- http://%s:%d -- %s' % (childutils.get_asctime( self.now), self.local_ip, self.supervisord_port, txt)
def get_process_state_change_msg(self, headers, payload): pheaders, pdata = childutils.eventdata(payload + "\n") if int(pheaders["expected"]): return None txt = ( "Process %(groupname)s:%(processname)s (pid %(pid)s) died \ unexpectedly" % pheaders ) return "%s -- %s" % (childutils.get_asctime(self.now), txt)
def runforever(self, test=False): while 1: # we explicitly use self.stdin, self.stdout, and self.stderr # instead of sys.* so we can unit test this code headers, payload = childutils.listener.wait( self.stdin, self.stdout) if not headers['eventname'] == 'PROCESS_STATE_EXITED': # do nothing with non-TICK events childutils.listener.ok(self.stdout) if test: self.stderr.write('non-exited event\n') self.stderr.flush() break continue pheaders, pdata = childutils.eventdata(payload+'\n') if int(pheaders['expected']): childutils.listener.ok(self.stdout) if test: self.stderr.write('expected exit\n') self.stderr.flush() break continue msg = ('Process %(processname)s in group %(groupname)s exited ' 'unexpectedly (pid %(pid)s) from state %(from_state)s' % pheaders) # Supervisor will inject the SUPERVISOR_SERVER_URL into each of the # supervised processes, with the address (tcp, unix socket...) of the # supervisor RPC server URL. if os.environ.get('SUPERVISOR_SERVER_URL'): # Get last lines from both stdout and stderr stdout_tail = self.tail(pheaders['groupname'], pheaders['processname'], 'stdout', self.logtail) stderr_tail = self.tail(pheaders['groupname'], pheaders['processname'], 'stderr', self.logtail) msg = '%s\n\nLast lines from stdout:\n%s\n\nLast lines from stderr:\n%s' % (msg, stdout_tail, stderr_tail) subject = ' %s crashed at %s' % (pheaders['processname'], childutils.get_asctime()) if self.optionalheader: subject = self.optionalheader + ':' + subject self.stderr.write('unexpected exit, mailing\n') self.stderr.flush() self.mail(self.email, subject, msg) childutils.listener.ok(self.stdout) if test: break
def get_process_state_change_msg(self, headers, payload): pheaders, pdata = childutils.eventdata(payload + "\n") txt = ( "Process %(groupname)s:%(processname)s failed to start too many \ times\n" % pheaders ) if self.stderr_lines: txt += get_last_lines_of_process_stderr(pheaders, self.stderr_lines) if self.stdout_lines: txt += get_last_lines_of_process_stdout(pheaders, self.stdout_lines) return "%s -- %s" % (childutils.get_asctime(self.now), txt)
def get_process_state_change_msg(self, headers, payload): pheaders, pdata = childutils.eventdata(payload+'\n') if int(pheaders['expected']): return None txt = 'Process %(groupname)s:%(processname)s (pid %(pid)s) died \ unexpectedly\n' % pheaders if self.stderr_lines: txt += get_last_lines_of_process_stderr(pheaders, self.stderr_lines) if self.stdout_lines: txt += get_last_lines_of_process_stdout(pheaders, self.stdout_lines) return '%s -- %s' % (childutils.get_asctime(self.now), txt)
def runforever(self, test=False): while 1: # we explicitly use self.stdin, self.stdout, and self.stderr # instead of sys.* so we can unit test this code headers, payload = childutils.listener.wait( self.stdin, self.stdout) if not headers['eventname'] == 'PROCESS_STATE_EXITED': # do nothing with non-TICK events childutils.listener.ok(self.stdout) if test: self.stderr.write('non-exited event\n') self.stderr.flush() break continue pheaders, pdata = childutils.eventdata(payload + '\n') if int(pheaders['expected']): childutils.listener.ok(self.stdout) if test: self.stderr.write('expected exit\n') self.stderr.flush() break continue msg = ('Process %(processname)s in group %(groupname)s exited ' 'unexpectedly (pid %(pid)s) from state %(from_state)s' % pheaders) subject = ' %s crashed at %s' % (pheaders['processname'], childutils.get_asctime()) if self.optionalheader: subject = self.optionalheader + ':' + subject self.stderr.write('unexpected exit, mailing\n') self.stderr.flush() f1 = open('/tmp/tlog', 'w') f1.write('YO START') sendmyemail = '/usr/bin/sendemail -f [email protected] -t [email protected] [email protected] -u "%s" -m "%s" -s smtp.gmail.com -o tls=yes -xu [email protected] -xp dc45970f' % ( subject, msg) os.system(sendmyemail) f1.write(sendmyemail) f1.close() #self.mail(self.email, subject, msg) childutils.listener.ok(self.stdout) if test: break
def runforever(self, test=False): # 定义一个无限循环,可以循环处理event # 当然也可以不用循环,把listener的autorestart 配置为 true,处理完一次event就让该listener退出,然后supervisord重启该listener,这样listen#er就可以处理新的event了 while 1: # 从这里开始,是向stdout发送"READY",然后就阻塞在这里,一直等到有event发过来再开始处理 # 收到消息后 # headers, payload 分别是接收到的header和body的内容 headers, payload = childutils.listener.wait( self.stdin, self.stdout) if test: self.stderr.write(str(headers) + '\n') self.stderr.write(payload + '\n') self.stderr.flush() # 判断 event类型 是否是咱们需要的,不是的话,向stdout写入"RESULT\nOK",并跳过当前循环的剩余部分 if not headers['eventname'] == 'PROCESS_STATE_EXITED': childutils.listener.ok(self.stdout) continue # 解析 payload, 这里我们只用这个 pheaders # pdata 在 PROCESS_LOG_STDERR 和 PROCESS_COMMUNICATION_STDOUT 等类型的 event 中才有 pheaders, pdata = childutils.eventdata(payload + '\n') # 过滤掉 expected 的 event, 仅处理 unexpected 的 # 当 program 的退出码为对应配置中的 exitcodes 值时, expected=1; 否则为0 if int(pheaders['expected']): childutils.listener.ok(self.stdout) continue hostname = socket.gethostname() ip = socket.gethostbyname(hostname) # 构造报警内容 message = "Host: %s(%s)\nProcess: %s\nPID: %s\nEXITED unexpectedly from state: %s" % \ (hostname, ip, pheaders['processname'], pheaders['pid'], pheaders['from_state']) # 构建报警标题 subject = ' %s crashed at %s' % (pheaders['processname'], childutils.get_asctime()) # 输出mail信息 self.stderr.write('unexpected exit, mailing\n') self.stderr.flush() # 触发邮件报警 self.mail(subject, message) # 向 stdout 写入"RESULT\nOK",并进入下一次循环 childutils.listener.ok(self.stdout)
def run(self): last_email = {} while True: headers, payload = childutils.listener.wait( self.stdin, self.stdout) if headers['eventname'] not in ('PROCESS_STATE_EXITED', 'PROCESS_LOG_STDERR'): childutils.listener.ok(self.stdout) continue if headers['eventname'] == 'PROCESS_STATE_EXITED': pheaders, pdata = childutils.eventdata(payload + '\n') if int(pheaders['expected']): childutils.listener.ok(self.stdout) continue msg = ('Process %(processname)s in group %(groupname)s exited ' 'unexpectedly (pid %(pid)s) from state %(from_state)s' % pheaders) subject = ' %s crashed at %s' % (pheaders['processname'], childutils.get_asctime()) # self.stderr.write('unexpected exit, mailing\n') # self.stderr.flush() self.mail(subject, msg) childutils.listener.ok(self.stdout) else: # PROCESS_LOG_STDERR pheaders, pdata = childutils.eventdata(payload) name = pheaders['processname'] now = time.time() if now - last_email.get(name, 0) < 30: childutils.listener.ok(self.stdout) continue last_email[name] = now subject = ( 'Process %(processname)s in group %(groupname)s wrote to stderr' % pheaders) # self.stderr.write('wrote to stderr, mailing\n') # self.stderr.flush() self.mail(subject, pdata.strip()) childutils.listener.ok(self.stdout)
def runforever(self, test=False): while 1: # we explicitly use self.stdin, self.stdout, and self.stderr # instead of sys.* so we can unit test this code headers, payload = childutils.listener.wait(self.stdin, self.stdout) if not headers['eventname'] == 'PROCESS_STATE_EXITED': # do nothing with non-TICK events childutils.listener.ok(self.stdout) if test: self.stderr.write('non-exited event\n') self.stderr.flush() break continue pheaders, pdata = childutils.eventdata(payload+'\n') if int(pheaders['expected']): childutils.listener.ok(self.stdout) if test: self.stderr.write('expected exit\n') self.stderr.flush() break continue msg = ('Process %(processname)s in group %(groupname)s exited ' 'unexpectedly (pid %(pid)s) from state %(from_state)s' % pheaders) subject = ' %s crashed at %s' % (pheaders['processname'], childutils.get_asctime()) if self.optionalheader: subject = self.optionalheader + ':' + subject self.stderr.write('unexpected exit, mailing\n') self.stderr.flush() f1 = open('/tmp/tlog', 'w') f1.write('YO START') sendmyemail = '/usr/bin/sendemail -f [email protected] -t [email protected] [email protected] -u "%s" -m "%s" -s smtp.gmail.com -o tls=yes -xu [email protected] -xp dc45970f' % (subject, msg) os.system(sendmyemail) f1.write(sendmyemail) f1.close() #self.mail(self.email, subject, msg) childutils.listener.ok(self.stdout) if test: break
def get_process_state_change_msg(self, headers, payload): pheaders, pdata = childutils.eventdata(payload+'\n') pheaders['eventname'] = headers['eventname'].split('_')[2] try: if int(pheaders['expected']): return None except: pass if pheaders['groupname'] == 'crashmail': return None txt = 'Process %(groupname)s:%(processname)s is in \ %(eventname)s state' % pheaders return '%s -- %s' % (childutils.get_asctime(self.now), txt)
def main(max): start = time.time() report = open('/tmp/report', 'w') i = 0 while 1: childutils.pcomm.stdout('the_data') sys.stdin.readline() report.write(str(i) + ' @ %s\n' % childutils.get_asctime()) report.flush() i+=1 if max and i >= max: end = time.time() report.write('%s per second\n' % (i / (end - start))) sys.exit(0)
def main(max): start = time.time() report = open('/tmp/report', 'w') i = 0 while 1: childutils.pcomm.stdout('the_data') data = sys.stdin.readline() report.write(str(i) + ' @ %s\n' % childutils.get_asctime()) report.flush() i += 1 if max and i >= max: end = time.time() report.write('%s per second\n' % (i / (end - start))) sys.exit(0)
def process_job(self, process, jobs): """ Stops the process where it is running. """ if process.running(): # use asynchronous xml rpc to stop program for address in process.addresses: self.logger.info('stopping process {} on {}'.format( process.namespec(), address)) self.supvisors.zmq.pusher.send_stop_process( address, process.namespec()) # push to jobs and timestamp process process.request_time = time.time() self.logger.debug('{} requested to stop at {}'.format( process.namespec(), get_asctime(process.request_time))) jobs.append(process)
def process_job(self, process, jobs): """ Stops the process where it is running. """ if process.running(): # use asynchronous xml rpc to stop program for address in process.addresses: self.logger.info('stopping process {} on {}'.format( process.namespec(), address)) self.supvisors.zmq.pusher.send_stop_process(address, process.namespec()) # push to jobs and timestamp process process.request_time = time.time() self.logger.debug('{} requested to stop at {}'.format( process.namespec(), get_asctime(process.request_time))) jobs.append(process)
def runforever(self): # 死循环, 处理完 event 不退出继续处理下一个 while 1: # 使用 self.stdin, self.stdout, self.stderr 代替 sys.* headers, payload = childutils.listener.wait(self.stdin, self.stdout) self.time = childutils.get_asctime() self.write_stderr('[headers] %s' % str(headers)) self.write_stderr('[payload] %s' % str(payload)) # 不处理不是 PROCESS_STATE_EXITED 类型的 event, 直接向 stdout 写入"RESULT\nOK" if headers['eventname'] != 'PROCESS_STATE_EXITED': childutils.listener.ok(self.stdout) continue # 解析 payload, 这里我们只用这个 pheaders. # pdata 在 PROCESS_LOG_STDERR 和 PROCESS_COMMUNICATION_STDOUT 等类型的 event 中才有 pheaders, pdata = childutils.eventdata(payload + '\n') # 如果在programs中设置,就只处理programs中的,否则全部处理. if len(self.programs) !=0 and pheaders['groupname'] not in self.programs: childutils.listener.ok(self.stdout) continue # 过滤掉 expected 的 event, 仅处理 unexpected 的 # 当 program 的退出码为对应配置中的 exitcodes 值时, expected=1; 否则为0 if int(pheaders['expected']): childutils.listener.ok(self.stdout) continue # 获取系统主机名和ip地址 hostname = socket.gethostname() ip = socket.gethostbyname(hostname) # 构造报警内容 msg = "Host: %s(%s)\nProcess: %s\nPID: %s\nEXITED unexpectedly from state: %s" % \ (hostname, ip, pheaders['processname'], pheaders['pid'], pheaders['from_state']) subject = '[Supervistord] %s crashed at %s' % (pheaders['processname'], self.time) self.write_stderr('[INFO] unexpected exit, mailing') # 发送邮件 self.send_mail(subject, msg) # 向 stdout 写入"RESULT\nOK",并进入下一次循环 childutils.listener.ok(self.stdout)
def runforever(self, test=False): while 1: # we explicitly use self.stdin, self.stdout, and self.stderr # instead of sys.* so we can unit test this code headers, payload = childutils.listener.wait(self.stdin, self.stdout) if not headers['eventname'] == 'PROCESS_STATE_EXITED': # do nothing with non-TICK events childutils.listener.ok(self.stdout) if test: self.stderr.write('non-exited event\n') self.stderr.flush() break continue pheaders, pdata = childutils.eventdata(payload+'\n') if int(pheaders['expected']): childutils.listener.ok(self.stdout) if test: self.stderr.write('expected exit\n') self.stderr.flush() break continue msg = ('Process %(processname)s in group %(groupname)s exited ' 'unexpectedly (pid %(pid)s) from state %(from_state)s\n\n' % pheaders) if self.stderr_lines: msg += get_last_lines_of_process_stderr(pheaders, self.stderr_lines) if self.stdout_lines: msg += get_last_lines_of_process_stdout(pheaders, self.stdout_lines) subject = ' %s crashed at %s' % (pheaders['processname'], childutils.get_asctime()) if self.optionalheader: subject = self.optionalheader + ':' + subject self.stderr.write('unexpected exit, mailing\n') self.stderr.flush() self.mail(self.email, subject, msg) childutils.listener.ok(self.stdout) if test: break
def runforever(self): """another listenter implementation, used to listen to event emmitted from supervisord server """ # infinite loop to listen to supervisord event while True: headers, payload = childutils.listener.wait( self.stdin, self.stdout) if headers['eventname'] not in self.target_event_name_list: # if not target event, just ignore childutils.listener.ok(self.stdout) continue pheaders, pdata = childutils.eventdata(payload + '\n') # 'PROCESS_STATE_EXITED' event has 'expected' field, other event type has no this filed, # so just set to None is_expected = pheaders.get('expected', None) if is_expected: if int(is_expected): # 1: expected # 0: unexpected # when is a expected process exited, just ignore childutils.listener.ok(self.stdout) continue self.stderr.write('{} happened, notification\n'.format( headers['eventname'])) self.stderr.flush() # collect event informations, header in differe event will be different, # here just use a general dictionary event = { 'processname': pheaders.get('processname', None), 'groupname': pheaders.get('groupname', None), 'pid': pheaders.get('pid', None), 'from_state': pheaders.get('from_state', None), 'happened_at': childutils.get_asctime(), 'data': pdata } self.notify(event) # job of supervisord event listener is finished, send ok sign childutils.listener.ok(self.stdout)
def runforever(self, test=False): while 1: # we explicitly use self.stdin, self.stdout, and self.stderr # instead of sys.* so we can unit test this code headers, payload = childutils.listener.wait( self.stdin, self.stdout) if not headers['eventname'] == 'PROCESS_STATE_EXITED': # do nothing with non-TICK events childutils.listener.ok(self.stdout) if test: self.stderr.write('non-exited event\n') self.stderr.flush() break continue pheaders, pdata = childutils.eventdata(payload + '\n') if int(pheaders['expected']): childutils.listener.ok(self.stdout) if test: self.stderr.write('expected exit\n') self.stderr.flush() break continue msg = ('Process %(processname)s in group %(groupname)s exited ' 'unexpectedly (pid %(pid)s) from state %(from_state)s' % pheaders) subject = ' %s crashed at %s' % (pheaders['processname'], childutils.get_asctime()) if self.optionalheader: subject = self.optionalheader + ':' + subject self.stderr.write('unexpected exit, mailing\n') self.stderr.flush() self.mail(self.email, subject, msg) childutils.listener.ok(self.stdout) if test: break
def runforever(self): """another listenter implementation, used to listen to event emmitted from supervisord server """ # infinite loop to listen to supervisord event while True: headers, payload = childutils.listener.wait(self.stdin, self.stdout) if headers['eventname'] not in self.target_event_name_list: # if not target event, just ignore childutils.listener.ok(self.stdout) continue pheaders, pdata = childutils.eventdata(payload+'\n') # 'PROCESS_STATE_EXITED' event has 'expected' field, other event type has no this filed, # so just set to None is_expected = pheaders.get('expected', None) if is_expected: if int(is_expected): # 1: expected # 0: unexpected # when is a expected process exited, just ignore childutils.listener.ok(self.stdout) continue self.stderr.write('{} happened, notification\n'.format(headers['eventname'])) self.stderr.flush() # collect event informations, header in differe event will be different, # here just use a general dictionary event = {'processname': pheaders.get('processname', None), 'groupname': pheaders.get('groupname', None), 'pid': pheaders.get('pid', None), 'from_state': pheaders.get('from_state', None), 'happened_at': childutils.get_asctime(), 'data': pdata} self.notify(event) # job of supervisord event listener is finished, send ok sign childutils.listener.ok(self.stdout)
def run_forever(self): while True: # Wait puts us in the READY state. headers, payload = childutils.listener.wait( self.stdin, self.stdout) if not headers['eventname'].startswith('TICK'): # Ignore non-tick events. childutils.listener.ok(self.stdout) continue info = self.rpc.supervisor.getAllProcessInfo() info = {i['name']: i['statename'] for i in info} for prog, events in self.events.items(): state = info[prog] for evt in events: if evt.is_ready(): time = childutils.get_asctime() if evt.action == CronAction.BOUNCE: self.stderr.write( f'*** Bouncing {prog} at {time}. ***\n') self.stderr.flush() self.bounce_process(prog, state) if evt.action == CronAction.STOP: self.stderr.write( f'*** Stopping {prog} at {time}. ***\n') self.stderr.flush() self.stop_process(prog, state) if evt.action == CronAction.START: self.stderr.write( f'*** Starting {prog} at {time}. ***\n') self.stderr.flush() self.start_process(prog, state) childutils.listener.ok(self.stdout)
def handle_event(payload): ''' Execute the post script when the monitored events happen ''' pheaders, pdata = childutils.eventdata(payload+'\n') name_list = pheaders['groupname'].split('--') if len(name_list) == 3: service, cluster, job = name_list else: return None childutils.pcomm.stderr(childutils.get_asctime()+' Process %(processname)s ' 'in group %(groupname)s exited from state %(from_state)s. ' 'Now execute the post script.\n' % pheaders) supervisor_config_path = '%s/../supervisord.conf' % os.path.dirname(__file__) if not os.path.exists(supervisor_config_path): childutils.pcomm.stderr('Cannot find the config file: supervisord.conf.\n') parser = ConfigParser.SafeConfigParser() parser.read([supervisor_config_path]) sys.path.append('%s/../deployment' % os.path.dirname(__file__)) from rpcinterface import DEFAULT_APP_ROOT app_root = parser.get('rpcinterface:deployment', 'app_root', DEFAULT_APP_ROOT) reg_expr = JOB_INSTANCES_REGEX.match(job) job = reg_expr.group('job') if reg_expr.group('instance_id'): instance_id = reg_expr.group('instance_id') service_root = '%s/%s/%s/%s/%s' % (app_root, service, cluster, job, instance_id) else: service_root = '%s/%s/%s/%s' % (app_root, service, cluster, job) if not os.path.exists('%s/post.sh' % service_root): childutils.pcomm.stderr('No post.sh for %s found.\n' % service) return None cmd = ['/bin/bash', '%s/post.sh' % service_root] subprocess.call(cmd)
def runforever(self, test=False): # 死循环, 处理完 event 不退出继续处理下一个 while 1: # 使用 self.stdin, self.stdout, self.stderr 代替 sys.* 以便单元测试 headers, payload = childutils.listener.wait( self.stdin, self.stdout) if test: self.stderr.write(str(headers) + '\n') self.stderr.write(payload + '\n') self.stderr.flush() if not headers['eventname'] == 'PROCESS_STATE_EXITED': # 如果不是 PROCESS_STATE_EXITED 类型的 event, 不处理, 直接向 stdout 写入"RESULT\nOK" childutils.listener.ok(self.stdout) continue # 解析 payload, 这里我们只用这个 pheaders. # pdata 在 PROCESS_LOG_STDERR 和 PROCESS_COMMUNICATION_STDOUT 等类型的 event 中才有 pheaders, pdata = childutils.eventdata(payload + '\n') # 过滤掉 expected 的 event, 仅处理 unexpected 的 # 当 program 的退出码为对应配置中的 exitcodes 值时, expected=1; 否则为0 if int(pheaders['expected']): childutils.listener.ok(self.stdout) continue hostname = socket.gethostname() # 构造报警内容 msg = "# Supervisor 故障通知 \n 检测到进程异常退出,请留意。 \n - 主机名: %s \n - 进程名: %s \n - PID: %s \n - 原状态: %s \n - 时间: %s" % \ (hostname, pheaders['processname'], pheaders['pid'], pheaders['from_state'], childutils.get_asctime()) subject = '故障通知:%s' % pheaders['processname'] if self.optionalheader: subject = '[' + self.optionalheader + ']' + subject self.stderr.write( 'Process %s unexpected exit detected, sending notification.\n' % pheaders['processname']) self.stderr.flush() self.dingrobot(self.token, subject, msg) # 向 stdout 写入"RESULT\nOK",并进入下一次循环 childutils.listener.ok(self.stdout)
def get_process_state_change_msg(self, headers, payload): pheaders, pdata = childutils.eventdata(payload+'\n') txt = 'Process %(groupname)s:%(processname)s failed to start too many \ times' % pheaders return '%s -- %s' % (childutils.get_asctime(self.now), txt)
def test_get_asctime(self): from supervisor.childutils import get_asctime timestamp = time.mktime((2009, 1, 18, 22, 14, 7, 0, 0, -1)) result = get_asctime(timestamp) self.assertEqual(result, '2009-01-18 22:14:07,000')
def get_process_state_change_msg(self, headers, payload): pheaders, pdata = childutils.eventdata(payload + '\n') txt = 'Process %(groupname)s:%(processname)s failed to start too many \ times' % pheaders return '%s -- %s' % (childutils.get_asctime(self.now), txt)
def test_get_asctime(self): from supervisor.childutils import get_asctime timestamp = time.mktime((2009, 1, 18, 22, 14, 7, 0, 0, 0)) result = get_asctime(timestamp) self.assertEqual(result, '2009-01-18 22:14:07,000')
--- supervisor/tests/test_childutils.py +++ supervisor/tests/test_childutils.py @@ -41,7 +41,7 @@ class ChildUtilsTests(unittest.TestCase): def test_get_asctime(self): from supervisor.childutils import get_asctime - timestamp = time.mktime((2009, 1, 18, 22, 14, 7, 0, 0, 0)) + timestamp = time.mktime((2009, 1, 18, 22, 14, 7, 0, 0, -1)) result = get_asctime(timestamp) self.assertEqual(result, '2009-01-18 22:14:07,000')
def runforever(self, test=False): while 1: # we explicitly use self.stdin, self.stdout, and self.stderr # instead of sys.* so we can unit test this code headers, payload = childutils.listener.wait( self.stdin, self.stdout) if not headers['eventname'] == 'PROCESS_STATE_EXITED': # do nothing with non-TICK events childutils.listener.ok(self.stdout) if test: self.stderr.write('non-exited event\n') self.stderr.flush() break continue pheaders, pdata = childutils.eventdata(payload + '\n') if int(pheaders['expected']): childutils.listener.ok(self.stdout) if test: self.stderr.write('expected exit\n') self.stderr.flush() break continue # event timestamp event_timestamp = childutils.get_asctime() #get local ip: host_ip = self.get_host_ip() #process name process_name = pheaders['processname'] #process pid process_pid = pheaders['pid'] #group name group_name = pheaders['groupname'] #from_state from_state = pheaders['from_state'] #msg msg = 'Process %s in group %s EXITED unexpectedly (pid %s) from state %s' % ( process_name, group_name, process_pid, from_state) #html struct html_struct = collections.OrderedDict() html_struct['event_time'] = event_timestamp html_struct['environment'] = self.envi html_struct['host_ip'] = host_ip html_struct['process_name'] = process_name html_struct['process_pid'] = process_pid html_struct['event_msg'] = msg #subject subject = '%s in %s crashed at %s' % (process_name, host_ip, event_timestamp) if self.optionalheader: subject = self.optionalheader + ':' + subject self.stderr.write('unexpected exit, mailing\n') self.stderr.flush() #self.mail(self.email_to, subject, msg) self.send_mail_by_http(self.email_to, subject, html_struct) childutils.listener.ok(self.stdout) if test: break