def _send_heartbeat_loop(self): if self._status.get_status() != self._status.RUNNING: log.warn('control service will stop. stop sending heartbeat') return hostinfo = None if platforms.is_linux(): hostinfo = cuphb.LinuxHost(str(self._agent_ipport), True, self._confdict['control']['interface']) elif platforms.is_mac(): hostinfo = cuphb.MacHost(str(self._agent_ipport), True, self._confdict['control']['interface']) log.info('to create msg and send msg') netmsg = msg.CNetMsg(is_postmsg=True) netmsg.set_from_addr(self._agent_ipport, (1, 1)) netmsg.set_to_addr(self._master_ipport, (1, 1)) netmsg.set_flag(1) netmsg.set_msg_type(self._type_man.getnumber_bytype('HEART_BEAT')) netmsg.set_uniq_id(1) netmsg.set_body(hostinfo.serilize()) self.post_msg(netmsg) log.info('finish queue sending heartbeat to {0}'.format( self._master_ipport)) self._executor.delay_exec( int(self._confdict['control']['heartbeat_interval']) - 3, self._send_heartbeat_loop, urgency=executor.URGENCY_HIGH)
def __exec_worker(self, check_interval, func_queue, worker_name=''): while self.__status != 2: try: item = func_queue.get(timeout=check_interval) except queue.Empty: # log.debug('no item found in exec queue') continue try: _, (function, argvs, kwargs) = item # pylint: disable=W0142 if func_queue is self.__delay_queue: log.debug('to delay exec func:{0}'.format(function)) function(*argvs, **kwargs) # pylint: disable=W0703 # we can NOT predict the exception type except Exception as error: log.warn( '%s worker encountered exception:%s, func:%s, args:%s' % (worker_name, error, function, kwargs) ) log.warn('error type:{0}'.format(type(error))) log.warn(traceback.format_exc()) log.info( '%s worker thread exited as the service is stopping' % worker_name )
def _on_heartbeat(self, netmsg): ip_port, _ = netmsg.get_from_addr() log.info( 'receive heartbeat, msg_len:%d, msg_flag:%d, msg_src:%s, ' 'uniqid:%d' % ( netmsg.get_msg_len(), netmsg.get_flag(), str(ip_port), netmsg.get_uniq_id() ) ) ack_msg = msg.CNetMsg(is_postmsg=True) ack_msg.set_from_addr(self._master_ipport, (1, 1)) ipaddr, stub_future = netmsg.get_from_addr() ack_msg.set_to_addr(ipaddr, stub_future) ack_msg.set_flag(netmsg.get_flag()) ack_msg.set_msg_type(self._type_man.getnumber_bytype('ACK_HEART_BEAT')) ack_msg.set_uniq_id(netmsg.get_uniq_id() + 1) ack_msg.set_body('ACK_HEART_BEAT') resource = hb_service.LinuxHost(name=str(self._master_ipport)) resource.deserilize(netmsg.get_body()) self._heartbeat_service.refresh( '%s:%s' % (ip_port[0], ip_port[1]), resource ) self.post_msg(ack_msg) return
def _move2next_load_fname(self): """ get next load fname""" folder = self._get_storage_dir() fname = os.path.basename(self._load_stream.name) files = self._get_ordered_logfiles(folder) length = len(files) ind = -1 try: ind = files.index(fname) except ValueError: log.error('cannot find current log stream:{0}'.format(fname)) return LOGFILE_BAD_RECORD newfile = None if ind < (length - 2): newfile = '{0}/{1}'.format(folder, files[ind + 1]) elif ind == (length - 2): if files[length - 1].find('writing') < 0: newfile = '{0}/{1}'.format(folder, files[length - 1]) else: log.debug('does not have more finished log edits to read') return LOGFILE_EOF elif ind == (length - 1): log.info('does not have more log edits to read, return') return LOGFILE_EOF try: self._load_stream.close() self._load_stream = open(newfile, 'rb') return LOGFILE_GOOD except StandardError as err: log.error('failed to move to next load stream:{0}'.format(newfile)) log.error('err:{0}'.format(err)) return LOGFILE_BAD_RECORD
def run(self): """ run the msgcenter """ if not self.setup(): return False thd_conn_man = threading.Thread(target=self._run_conn_manager, args=()) thd_conn_man.start() thd_stat = threading.Thread(target=self.dump_stat, args=()) thd_stat.start() # if self._check_flag == CHECK_ON: # cup.log.info('start run check msg transfer thread.') # self._run_conn_msg_check_loop() ind = 0 msg_ackflag = async_msg.MSG_FLAG2NUM['FLAG_ACK'] while not self._stop: msg = self._conn_mgr.get_recv_msg() if ind >= 10000: recv_queue = self._conn_mgr.get_recv_queue() cup.log.info('recv queue size:{0}'.format(recv_queue.qsize())) ind = 0 if msg is not None: log.info('msg received, type:%d, flag:%d, from:%s, uniqid:%d' % (msg.get_msg_type(), msg.get_flag(), str(msg.get_from_addr()), msg.get_uniq_id())) ind += 1 if msg_ackflag & msg.get_flag() == msg_ackflag: self._conn_mgr.push_msg2needack_queue(msg) # else: self.handle(msg) msg = None return True
def purge_data(self, before_logid): """ log files which contains log (less than before_logid) will be purged. """ folder = self._get_storage_dir() logfiles = self._get_ordered_logfiles(folder) last_logid = None last_fname = None purge_list = [] for fname in logfiles: if fname.find('writing') >= 0: continue current = int(fname.split('.')[-1]) if last_logid is not None and (current - 1) < before_logid: purge_list.append(last_fname) last_fname = fname last_logid = current log.info('log id < before_logid will be purged:purged:{0}'.format( purge_list) ) ind = 0 for fname in purge_list: full = '{0}/{1}'.format(folder, fname) log.info('to purge log file:{0}'.format(full)) try: os.remove(full) ind += 1 except StandardError as err: log.error( 'failed to purge log file:{0}, {1}'.format(full, err) ) if ind % 1000: time.sleep(0.1)
def _target(argcontent, proc_cond): argcontent.tempscript = tempfile.NamedTemporaryFile( dir=self._tmpdir, prefix=self._tmpprefix, delete=True) with open(argcontent.tempscript.name, 'w+b') as fhandle: fhandle.write('cd {0};\n'.format(os.getcwd())) fhandle.write(argcontent.cmd) shexe = self.which('sh') cmds = [shexe, argcontent.tempscript.name] log.info('to async execute {0} with script {1}'.format( argcontent.cmd, cmds)) try: proc_cond.acquire() argcontent.subproc = subprocess.Popen( cmds, stdout=subprocess.PIPE, stderr=subprocess.PIPE, preexec_fn=_signal_handle) proc_cond.notify() proc_cond.release() except OSError: proc_cond.notify() proc_cond.release() argcontent.ret['returncode'] = -1 argcontent.ret['stderr'] = ( 'failed to execute the cmd, plz check it out\'s')
def add_log(self, log_type, log_mode, log_binary): """ add log into the local file""" if not self.is_stream_open(): fname = self.get_next_logfile(self._logid) if not self._stream_open(fname): return False # binary := # 32bit len | 128bit logid | log_type 16bit | log_mode 16bit| binary bin_logid = self.asign_uint2byte_bybits(self._logid, 128) bin_type = self.asign_uint2byte_bybits(log_type, 16) bin_mode = self.asign_uint2byte_bybits(log_mode, 16) data = '{0}{1}{2}{3}'.format(bin_logid, bin_type, bin_mode, log_binary) data_len = len(data) str_data_len = self.asign_uint2byte_bybits(data_len, 32) log.debug('{0} add_log, log_type {1} log_mode {2}'.format( self.__class__, log_type, log_mode) ) write_data = '{0}{1}'.format(str_data_len, data) log.info('to add data, logid:{0}'.format(self._logid)) if self._write_data(write_data): log.debug('add_log, write success') self._current_filesize += (data_len + 4) if not self._check_need_new_logfile(): return False return True else: log.warn('{0} failed to add_log, log_type {1} log_mode {2}'.format( self.__class__, log_type, log_mode) ) return False
def __init__(self): super(self.__class__, self).__init__( './test.log', log.DEBUG ) log.info('Start to run ' + str(__file__)) self._executor = executor.ExecutionService( )
def run(self): """ run the msgcenter """ if not self.setup(): return False thd_conn_man = threading.Thread(target=self._run_conn_manager, args=()) thd_conn_man.start() thd_stat = threading.Thread(target=self.dump_stat, args=()) thd_stat.start() ind = 0 msg_ackflag = async_msg.MSG_FLAG2NUM['FLAG_ACK'] while not self._stop: msg = self._conn_mgr.get_recv_msg() if ind >= 10000: recv_queue = self._conn_mgr.get_recv_queue() log.info('msgcenter netmsg queue size:{0}'.format( recv_queue.qsize())) ind = 0 if msg is not None: try: log.info( 'msg received, type:%d, flag:%d, from:%s, uniqid:%d' % (msg.get_msg_type(), msg.get_flag(), str(msg.get_from_addr()), msg.get_uniq_id())) ind += 1 if msg_ackflag & msg.get_flag() == msg_ackflag: self._conn_mgr.push_msg2needack_queue(msg) self.handle(msg) # pylint: disable=W0703 except Exception as err: log.error('get a msg that cannot be handled.' 'Seems network err:{0}'.format(err)) msg = None return True
def add_write_job(self, context): """ add network write into queue """ if context is None: return try: peerinfo = context.get_peerinfo() # pylint: disable=W0703 except Exception as error: log.info('failed to get peerinfo, return') return if not context.try_writelock(): log.debug('Another thread is writing the context, return. ' 'Peerinfo:%s:%s' % (peerinfo[0], peerinfo[1])) return if context.is_detroying(): log.info('The context is being destroyed, i will do nothing. ' 'Peerinfo:%s:%s' % (peerinfo[0], peerinfo[1])) return try: # log.debug('write in add_write_job') self._do_write(context) self._finish_write_callback(True, context) # pylint: disable=W0703 except Exception as error: log.debug( 'seems error happend for context:%s Peerinfo:%s:%s\n, %s' % (str(error), peerinfo[0], peerinfo[1], traceback.format_exc())) self._finish_write_callback(False, context)
def login(self, username, passwords): """ if the smtp need login, plz call this method before you call sendmail """ log.info('smtp server will login with user {0}'.format(username)) self._login_params = (username, passwords)
def exec_worker(self, check_interval, func_queue, worker_name=''): log.info('CronExecution exec worker started') while self._status != 2: try: item = func_queue.get(timeout=check_interval) except queue.Empty: continue function = None argvs = None kwargs = None try: _, crontask, (function, argvs, kwargs) = item # pylint: disable=W0142 if func_queue is self._delay_queue: log.debug('to delay exec func:{0}'.format(function)) dtnow = datetime.datetime.now(crontask.pytz_timezone()) if (dtnow - crontask.get_last_schedtime()).total_seconds() > 60: log.warn( 'lagging crontask found (name:{0} id: {1})'.format( crontask.name(), crontask.taskid())) function(*argvs, **kwargs) self.schedule(crontask) # pylint: disable=W0703 # we can NOT predict the exception type except Exception as error: log.warn('{0} worker encountered exception:{1}, func:{2},' 'args:{3} {4} , executor service({5})'.format( worker_name, error, function, argvs, kwargs, self._name)) log.warn('error type:{0}'.format(type(error))) log.debug('{0} worker thread exited as the service ' 'is stopping'.format(worker_name))
def __init__(self, config): """ :param config: { "uri":"ftp://host:port", "user":"******", "password":"******", "extra":None //timeout:30s } :raise: cup.err.ConfigError if there's any config item missing """ ObjectInterface.__init__(self, config) required_keys = ['uri', 'user', 'password'] if not self._validate_config(self._config, required_keys): raise err.ConfigError(str(required_keys)) self._uri = self._config['uri'] self._user = self._config['user'] self._passwd = self._config['password'] self._extra = self._config['extra'] self._dufault_timeout = 30 if self._extra is not None and isinstance(self._config['extra'], int): self._dufault_timeout = self._extra log.info('to connect to ftp server') self._ftp_con = ftplib.FTP() self._host = self._uri.split(':')[1][2:] self._port = ftplib.FTP_PORT if len(self._uri.split(':')[2]) > 0: self.port = int(self._uri.split(':')[2]) self._ftp_con.connect(self._host, self._port, self._dufault_timeout) self._ftp_con.login(self._user, self._passwd)
def purge_data(self, before_logid): """ log files which contains log (less than before_logid) will be purged. """ folder = self._get_storage_dir() logfiles = self._get_ordered_logfiles(folder) last_logid = None last_fname = None purge_list = [] for fname in logfiles: if fname.find('writing') >= 0: continue current = int(fname.split('.')[-1]) if last_logid is not None and (current - 1) < before_logid: purge_list.append(last_fname) last_fname = fname last_logid = current log.info('log id < before_logid will be purged:purged:{0}'.format( purge_list)) ind = 0 for fname in purge_list: full = '{0}/{1}'.format(folder, fname) log.info('to purge log file:{0}'.format(full)) try: os.remove(full) ind += 1 except StandardError as err: log.error('failed to purge log file:{0}, {1}'.format( full, err)) if ind % 1000: time.sleep(0.1)
def __init__(self, config): """ :param config: be complied with cup.util.conf.Configure2Dict().get_dict(). Shoule be dict like object :raise: cup.err.ConfigError if there's any config item missing """ ObjectInterface.__init__(self, config) required_keys = ['ak', 'sk', 'endpoint', 'bucket'] if not self._validate_config(config, required_keys): raise err.ConfigError(str(required_keys)) self._config = config self._ak = self._config['ak'] self._sk = self._config['sk'] self._endpoint = self._config['endpoint'] self._bucket = self._config['bucket'] import boto3 from botocore import exceptions from botocore import client as coreclient self._s3_config = coreclient.Config(signature_version='s3v4', s3={'addressing_style': 'path'}) logging.getLogger('boto3').setLevel(logging.INFO) logging.getLogger('botocore').setLevel(logging.INFO) logging.getLogger('s3transfer').setLevel(logging.INFO) log.info('to connect to boto3') self.__s3conn = boto3.client( 's3', aws_access_key_id=self._ak, aws_secret_access_key=self._sk, endpoint_url=self._endpoint, # region_name=conf_dict['region_name'], config=self._s3_config) self._exception = exceptions.ClientError
def add_log(self, log_type, log_mode, log_binary): """ add log into the local file""" if not self.is_stream_open(): fname = self.get_next_logfile(self._logid) if not self._stream_open(fname): return False # binary := # 32bit len | 128bit logid | log_type 16bit | log_mode 16bit| binary bin_logid = self.asign_uint2byte_bybits(self._logid, 128) bin_type = self.asign_uint2byte_bybits(log_type, 16) bin_mode = self.asign_uint2byte_bybits(log_mode, 16) data = '{0}{1}{2}{3}'.format(bin_logid, bin_type, bin_mode, log_binary) data_len = len(data) str_data_len = self.asign_uint2byte_bybits(data_len, 32) log.debug('{0} add_log, log_type {1} log_mode {2}'.format( self.__class__, log_type, log_mode)) write_data = '{0}{1}'.format(str_data_len, data) log.info('to add data, logid:{0}'.format(self._logid)) if self._write_data(write_data): log.debug('add_log, write success') self._current_filesize += (data_len + 4) if not self._check_need_new_logfile(): return False return True else: log.warn('{0} failed to add_log, log_type {1} log_mode {2}'.format( self.__class__, log_type, log_mode)) return False
def connect(self, peer): """ :param peer: ip:port """ log.info('to connect to peer:{0}'.format(peer)) sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self._set_sock_params(sock) try: ret = sock.connect_ex(peer) if ret != 0: log.warn('connect failed, peer:{0}'.format(peer)) return None if sock.getpeername() == sock.getsockname(): log.warn('connect failed, seems connected to self') sock.close() return None self._set_sock_nonblocking(sock) return sock except socket.error as error: log.warn('failed to connect to %s:%s. Error:%s' % (peer[0], peer[1], str(error))) sock.close() return None else: sock.close() return None
def schedule(self, crontask): """schedule. :param timer_dict: { 'minute': minute_list, 'hour': hour_list, 'weekday': weekday_list, 'monthday': monday_list, 'month': month_list } :param function: function that to be scheduled :param args: args of function :param kwargs: key args of function """ next_schedtime = crontask.next_schedtime() if next_schedtime is None: log.warn('CronExecution:crontask {0} will be deleted ' 'from the crontask as ' 'no valid schedule time is found'.format(crontask)) function, args, kwargs = crontask.get_funcargs() tmpnow = crontask.pytz_timezone().localize(datetime.datetime.now()) wait_seoncds = (next_schedtime - tmpnow).total_seconds() log.info('CronExecution: next schedule time for this crontask is {0} ' 'timezone {1}, wait for {2} seconds, timenwo is {3}'.format( next_schedtime, next_schedtime.tzinfo, wait_seoncds, next_schedtime.tzinfo.localize(datetime.datetime.now()))) self.delay_exec(wait_seoncds, crontask, function, *args, **kwargs) self._task_dict[crontask.taskid()] = crontask
def get(self, path, localpath): """ get a file into localpath """ ret = { 'returncode': 0, 'msg': 'success' } log.info('to get ftp file {0} to {1}'.format(path, localpath)) self._check_timeout() cwd = self._ftp_con.pwd() path = self._get_relative_path(path, cwd) if localpath.endswith('/'): localpath += path.split('/')[-1] log.info('to get ftp {0} to local {1}'.format(path, localpath)) try: with open(localpath, 'w+') as fhandle: ftp_cmd = 'RETR {0}'.format(path) resp = self._ftp_con.retrbinary(ftp_cmd, fhandle.write) except Exception as error: ret['returncode'] = -1 ret['msg'] = 'failed to get {0} to {1}, err:{2}'.format( path, localpath, error ) log.error(ret['msg']) return ret
def global_sock_keepalive(self, after_idle_sec=1, interval_sec=3, max_fails=5): """ Set TCP keepalive on an open socket. It activates after 1 second (after_idle_sec) of idleness, then sends a keepalive ping once every 3 seconds (interval_sec), and closes the connection after 5 failed ping (max_fails), or 15 sec Notice, this will set all sockets this way. :param sock: socket :param after_idle_sec: for TCP_KEEPIDLE. May not work, depends on ur system :param interval_sec: for TCP_KEEPINTVL :param max_fails: for TCP_KEEPCNT """ before = copy.deepcopy(self.SOCK_ALIVE_PARAMS) self.SOCK_ALIVE_PARAMS = { 'after_idle_sec': after_idle_sec, 'interval_sec': interval_sec, 'max_fails': max_fails } log.info( 'to set global socket keepalive params from {0} to {1}'.format( before, self.SOCK_ALIVE_PARAMS))
def connect(self, peer): """ :param peer: ip:port """ log.info('to connect to peer:{0}'.format(peer)) sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self._set_sock_params(sock) try: ret = sock.connect_ex(peer) if ret != 0: log.warn('connect failed, peer:{0}'.format(peer)) return None if sock.getpeername() == sock.getsockname(): log.warn('connect failed, seems connected to self') sock.close() return None self._set_sock_nonblocking(sock) log.info('connect peer success') return sock except socket.error as error: log.warn( 'failed to connect to %s:%s. Error:%s' % (peer[0], peer[1], str(error)) ) sock.close() return None else: sock.close() return None
def check_and_load_existence(user_confdict, default_dict, key, required=False): """ check if the conf item is required to be existent. Use default if it's not required and does not exist. Raise ConfItemError if it's required and does not exists """ confitem = None try: # try user conf dict confitem = eval('user_confdict{0}'.format(key)) except KeyError: log.debug('user conf does not have {0} in user_confdict'.format(key)) if confitem is None: try: # try user conf dict confitem = eval('default_dict{0}'.format(key)) log.info('{0} will use default value:{1}'.format( key, confitem) ) except KeyError: log.warn('default conf does not have {0}'.format(key)) if confitem is None and required: raise ConfItemError('{0} should exist'.format(key)) return confitem
def _do_read(self, context): sock = context.get_sock() data = None context.move2recving_msg() while self._stopsign is not True: try: data = sock.recv(self.NET_RW_SIZE) except socket.error as error: err = error.args[0] if err == errno.EAGAIN: log.debug('EAGAIN happend, peer info %s' % context.get_context_info()) return context elif err == errno.EWOULDBLOCK: log.info('EWOULDBLOCK happend, context info %s' % context.get_context_info()) return context else: log.debug('Socket error happend, error:%s, peer info %s' % (str(error), context.get_context_info())) context.to_destroy() return context except Exception as error: log.critical('Socket error happend, error:%s, peer info %s' % (str(error), context.get_context_info())) context.to_destroy() return context data_len = len(data) if data_len == 0: # socket closed by peer context.to_destroy() return context context.do_recv_data(data, data_len) del data
def put(self, destfile, localfile): """ :param destfile: ftp path for the localfile :param localfile: localfile """ ret = {'returncode': 0, 'msg': 'success'} log.info('to put localfile {0} to ftp {1}'.format(localfile, destfile)) self._check_timeout() cwd = self._ftp_con.pwd() destdir = None destfile = os.path.normpath(destfile) destfile = self._get_relative_path(destfile, cwd) rindex = destfile.rfind('/') if rindex < 0: destdir = cwd file_name = destfile elif rindex >= (len(destfile) - 1): raise ValueError('value error, destfile {0}'.format(destfile)) else: destdir = destfile[:rindex] file_name = destfile.split('/')[-1] log.info('put localfile {0} into ftp {1}'.format(localfile, destfile)) with open(localfile, 'rb') as fhandle: try: self._ftp_con.cwd(destdir) ftp_cmd = 'STOR {0}'.format(file_name) self._ftp_con.storbinary(ftp_cmd, fhandle) except Exception as error: ret['returncode'] = -1 ret['msg'] = 'failed to put, err:{0}'.format(error) self._ftp_con.cwd(cwd) return ret
def _do_write(self, context): """write into interface sending buffer""" sock = context.get_sock() msg = context.try_move2next_sending_msg() if msg is None: log.debug('send queue is empty, quit the _do_write thread') return context # log.debug('To enter write loop until eagin') # pylint:disable=w0212 while not self._stopsign: data = msg.get_write_bytes(self.NET_RW_SIZE) log.debug('msg get_write_bytes_len to be sent: %d' % len(data)) try: succ_len = sock.send(data) msg.seek_write(succ_len) except cuperr.AsyncMsgError as error: log.debug('has seek out of msg len, continue') except socket.error as error: err = error.args[0] if err == errno.EAGAIN: log.debug( 'EAGAIN happend, context info %s' % context.get_context_info() ) return context elif err == errno.EWOULDBLOCK: log.debug( 'EWOULDBLOCK happend, context info %s' % context.get_context_info() ) return context else: log.warn( 'Socket error happend. But its not eagin,error:%s,\ context info %s, errno:%s' % (str(error), context.get_context_info(), err) ) context.to_destroy() break except Exception as error: log.error( 'Socket error happend, error:%s, context info %s, trace:%s' % (str(error), context.get_context_info(), traceback.format_exc()) ) context.to_destroy() break finally: del data if msg.is_msg_already_sent(): log.info( 'sent out a msg uniqid:{0}'.format( async_msg.netmsg_tostring(msg)) ) # if we have successfully send out a msg. Then move to next one msg = context.try_move2next_sending_msg() if msg is None: break return context
def stop(self, force_stop=False): """ stop the connection manager """ log.info('to stop the connection manager') self._stopsign = True self._async_stop(force_stop) log.info('connection manager stopped')
def stop(self): """ stop the connection manager """ log.info('to stop the connection manager') self._stopsign = True self._thdpool.stop() log.info('connection manager stopped')
def run(self): """ run the msgcenter """ if not self.setup(): return False thd_conn_man = threading.Thread(target=self._run_conn_manager, args=()) thd_conn_man.start() thd_stat = threading.Thread(target=self.dump_stat, args=()) thd_stat.start() # if self._check_flag == CHECK_ON: # cup.log.info('start run check msg transfer thread.') # self._run_conn_msg_check_loop() ind = 0 msg_ackflag = async_msg.MSG_FLAG2NUM['FLAG_ACK'] while not self._stop: msg = self._conn_mgr.get_recv_msg() if ind >= 10000: recv_queue = self._conn_mgr.get_recv_queue() cup.log.info('recv queue size:{0}'.format(recv_queue.qsize())) ind = 0 if msg is not None: # msg_addr = msg.get_to_addr()[0] # msg_ip = msg_addr[0] # msg_port = msg_addr[1] # uniq_id = msg.get_uniq_id() # msg_key = str(msg_ip) + '_' + str(msg_port) + '_' + str(uniq_id) # cup.log.info('msg[{0}] is already sent'.format(msg_key)) # if msg.get_msg_type() == self._type_man.getnumber_bytype('HEART_BEAT') or \ # msg.get_msg_type() == self._type_man.getnumber_bytype('ACK_HEART_BEAT'): # cup.log.info('get heart_beat msg') # elif msg.get_msg_type() == self._type_man.getnumber_bytype('ACK_OK'): # cup.log.info('recv ack ok msg') # self._conn_mgr.push_msg2needack_queue(msg) # elif msg.get_msg_type() == self._type_man.getnumber_bytype('NEED_ACK'): # msg_to_addr = msg.get_to_addr() # msg_from_addr = msg.get_from_addr() # msg_uniq_id = msg.get_uniq_id() # self._post_ackok_msg(msg_from_addr, msg_to_addr, msg_uniq_id) # cup.log.info('handle msg in msgcenter run') # self.handle(msg) log.info( 'msg received, type:%d, flag:%d, from:%s, uniqid:%d' % ( msg.get_msg_type(), msg.get_flag(), str(msg.get_from_addr()), msg.get_uniq_id() ) ) ind += 1 if msg_ackflag & msg.get_flag() == msg_ackflag: self._conn_mgr.push_msg2needack_queue(msg) # else: self.handle(msg) msg = None return True
def adjust_judge_lost_time(self, time_in_sec): """ adjust judge_lost_in_sec """ log.info( 'heartbeat service judge_lost_in_sec changed, old %d, new %d' % (self._judge_lost, time_in_sec)) self._judge_lost = time_in_sec return
def __init__(self): super(self.__class__, self).__init__( b_logstd=False ) log.info('Start to run ' + str(__file__)) self._hb = heartbeat.HeartbeatService( judge_lost_in_sec=5, keep_lost=True ) self._tmpfile = _NOW_PATH + '_tmp_file'
def stop(self): """ stop the service """ log.info('to stop the arrow agent') self._status.set_status(self._status.STOPPING) self._executor.stop() msgcenter.IMessageCenter.stop(self) self._status.set_status(self._status.STOPPED)
def signal_handler(sig, _): """ signal handler for master. When this process receive SIGTERM signal, it will start stopping process. """ if sig == signal.SIGTERM: log.info('get SIGTERM, to stop arrow master') master = Master(None) master.signal_handler()
def signal_handler(sig, _): """ signal handler for master. When this process receive SIGTERM signal, it will start stopping process. """ if sig == signal.SIGTERM: log.info('get SIGTERM, to stop arrow master') agent = Agent(None) agent.signal_handler()
def adjust_judge_lost_time(self, time_in_sec): """ adjust judge_lost_in_sec """ log.info( 'heartbeat service judge_lost_in_sec changed, old %d, new %d' % ( self._judge_lost, time_in_sec ) ) self._judge_lost = time_in_sec return
def __init__(self, conf_file): # load conf self._load_conf(conf_file) # control service ipaddr = net.get_hostip() port = int(self._conf_dict['control']['port']) # control service which control msg sending and receiving. self._control_service = control.ControlService(ipaddr, port, self._conf_dict) log.info('ip:{0}, port:{1}'.format(ipaddr, port)) self._stop_heart_beat = False
def set_current_logid(self, logid): """reset current log id""" if logid < 0: raise ValueError('cannot setup logid less than 0') self._logid = logid fname = self._get_next_logfile(self._logid) if not self._stream_wbopen(fname): log.error('failed to open stream, return False') return False log.info('reset current log id to {0}'.format(logid)) return True
def stop(self, force_stop=False): """ stop the message center """ log.info('To stop the msgcenter') self._conn_mgr.stop(force_stop) self._stop = True self._stat_cond.acquire() self._stat_cond.notify() self._stat_cond.release() log.info('msgcenter stopped')
def set_current_logid(self, logid): """reset current log id""" if logid < 0: raise ValueError('cannot setup logid less than 0') self._logid = logid fname = self.get_next_logfile(self._logid) if not self._stream_open(fname): log.error('failed to open stream, return False') return False log.info('reset current log id to {0}'.format(logid)) return True
def dump_stats(self, print_stdout=False): """ 打印当前threadpool的状态信息到log 和stdout 其中状态信息来自于get_stats函数 """ stat = self.get_stats() if print_stdout: print stat log.info('ThreadPool Stat %s: %s' % (self._name, stat)) log.debug('queue: %s' % self._jobqueue.queue) log.debug('waiters: %s' % self._waiters) log.debug('workers: %s' % self._working) log.debug('total: %s' % self._threads) return stat
def _check_need_new_logfile(self): """if need new log file""" if os.path.exists(self._loglist_switched) and self._logfile_switching: try: os.rename(self._logfile_listnew, self._logfile_list) os.remove(self._loglist_switch) os.remove(self._loglist_switched) # pylint: disable=W0703 except Exception as err: log.error('failed to rename loglist, old:{0} new:{1}'.format( self._loglist_switched, self._loglist_switch) ) return False if os.path.exists(self._loglist_switch) and \ (not self._logfile_switching): self._loglist_stream.write('NEED_SWITCH_LOCALFILE\n') self._loglist_stream.flush() os.fsync(self._loglist_stream) self._loglist_stream.close() self._logfile_switching = True if not os.path.exists(self._logfile_listnew): try: os.mknod(self._logfile_listnew) # pylint: disable=W0703 except Exception as err: log.error('switch loglist file failed:{0}'.format(err)) return False self._loglist_stream = open(self._logfile_listnew, 'a') if self._current_filesize >= self._max_log_file_size: # log.info('serilizer file needs moving to a new one') last_logid = self._writestream.name.split('.')[-1] newname = os.path.normpath('{0}/done.{1}'.format( os.path.dirname(self._writestream.name), last_logid )) log.info( 'finish one log file, logid, range:{0}-{1}'.format( last_logid, self._logid - 1 ) ) os.rename(self._writestream.name, newname) self._stream_close() self._loglist_stream.write('{0}\n'.format(newname)) self._loglist_stream.flush() os.fsync(self._loglist_stream.fileno()) self._current_filesize = 0 # log.info('next logid:{0}'.format(self._logid)) fname = self.get_next_logfile(self._logid) if not self._stream_open(fname): return False return True
def _handle_new_conn(self, newsock, peer): self._mlock.acquire() self._set_sock_params(newsock) self._set_sock_nonblocking(newsock) context = CConnContext() context.set_sock(newsock) context.set_conn_man(self) context.set_peerinfo(peer) self._epoll.register( newsock.fileno(), select.EPOLLIN | select.EPOLLET | select.EPOLLERR ) self._fileno2context[newsock.fileno()] = context self._peer2context[peer] = context self._context2fileno_peer[context] = (newsock.fileno(), peer) log.info('a new connection: %s:%s' % (peer[0], peer[1])) self._mlock.release()
def __init__( self, delay_exe_thdnum=3, queue_exec_thdnum=4 ): self.__toal_thdnum = delay_exe_thdnum + queue_exec_thdnum self.__delay_exe_thdnum = delay_exe_thdnum self.__queue_exe_thdnum = queue_exec_thdnum self.__delay_queue = queue.PriorityQueue() self.__exec_queue = queue.PriorityQueue() self.__thdpool = threadpool.ThreadPool( self.__toal_thdnum, self.__toal_thdnum ) self.__status = 0 # 0 inited, 1 running 2 stopping log.info( 'Executor service inited, delay_exec thread num:%d,' ' exec thread num:%d' % (delay_exe_thdnum, queue_exec_thdnum) )
def _do_check_dead_agent(self): lost = self._heartbeat_service.get_lost() # schedule next handle dead_agent # status 2 == stopping if self._status != 2: self._executor.queue_exec( settings.ARROW_MASTER_DEFAULT_PARAMS['check_heartbeat_interval'], self._do_heartbeat, 1, None ) else: log.info( 'ControlService is stopping. Check dead agent service' 'exited' )
def bind(self): """ bind the ip:port """ sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self._set_sock_params(sock) sock.bind((self._bind_ip, self._bind_port)) self._set_sock_nonblocking(sock) log.info( 'bind port info:(ip:%s, port:%s)' % ( self._bind_ip, self._bind_port ) ) self._epoll.register( sock.fileno(), select.EPOLLIN | select.EPOLLET | select.EPOLLOUT | select.EPOLLERR ) self._bind_sock = sock
def stop(self, wait_workerstop=True): """ stop the executor service. :wait_workerstop: If wait_workerstop is True, the function will hang util all workers finish thier tasks. Otherwise, the function will not hang, but tell you whether it's succeeded stopped. (True for stoped, False for not stopped yet) """ log.info('to stop executor') self.__status = 2 if wait_workerstop: self.__thdpool.stop() else: self.__thdpool.try_stop() log.info('end stopping executor')
def _do_read(self, context): sock = context.get_sock() data = None context.get_recving_msg() while self._stopsign is not True: try: data = sock.recv(self.NET_RW_SIZE) except socket.error as error: err = error.args[0] if err == errno.EAGAIN: log.debug( 'EAGAIN happend, peer info %s' % context.get_context_info() ) return context elif err == errno.EWOULDBLOCK: log.info( 'EWOULDBLOCK happend, context info %s' % context.get_context_info() ) return context else: log.warn( 'Socket error happend, error:%s, peer info %s' % (str(error), context.get_context_info()) ) context.to_destroy() return context except Exception as error: log.critical( 'Socket error happend, error:%s, peer info %s' % (str(error), context.get_context_info()) ) context.to_destroy() return context data_len = len(data) if data_len == 0: # socket closed by peer context.to_destroy() return context context.do_recv_data(data, data_len) del data
def poll(self): """ start to poll """ self._thdpool.start() misc.check_not_none(self._bind_sock) self._bind_sock.listen(10) while not self._stopsign: try: events = self._epoll.poll(1) except IOError as err: if err.errno == errno.EINTR: return raise err # log.debug('start to poll') for fileno, event in events: # if it comes from the listen port, new conn if fileno == self._bind_sock.fileno(): newsock, addr = self._bind_sock.accept() self._handle_new_conn(newsock, addr) elif event & select.EPOLLIN: try: self._handle_new_recv(self._fileno2context[fileno]) except KeyError: log.info('socket already closed') elif event & select.EPOLLOUT: try: self._handle_new_send(self._fileno2context[fileno]) except KeyError: log.info('socket already closed') elif (event & select.EPOLLHUP) or (event & select.EPOLLERR): # FIXME: consider if we need to release net msg resources if event & select.EPOLLHUP: log.info('--EPOLLHUP--') else: log.info('--EPOLLERR--') try: self._handle_error_del_context( self._fileno2context[fileno] ) except KeyError: log.info('socket already closed')
def run(self): """ Delayexec worker checks task every 20ms QueueExec worker checks task every 100ms """ self.__thdpool.start() self.__status = 1 for _ in xrange(0, self.__delay_exe_thdnum): self.__thdpool.add_1job( self.__exec_worker, 0.1, self.__delay_queue, 'Delayexec' ) for _ in xrange(0, self.__queue_exe_thdnum): self.__thdpool.add_1job( self.__exec_worker, 0.02, self.__exec_queue, 'Exec' ) log.info('Executor service started')
def __exec_worker(self, check_interval, func_queue, worker_name=''): while self.__status != 2: try: item = func_queue.get(timeout=check_interval) except queue.Empty: log.debug('no item found in exec queue') continue try: _, (function, data) = item function(data) # pylint: disable=W0703 # we can NOT predict the exception type except Exception as error: log.warn( '%s worker encountered exception:%s, func:%s, data:%s' % (worker_name, error, function, data) ) log.info( '%s worker thread exited as the service is stopping' % worker_name )
def open4write(self, truncate_last_failure=True): """ open4write :raise Exception: if encounter any IOError, will raise IOError(errmsg) """ try: if not os.path.exists(self._logfile_list): os.mknod(self._logfile_list) self._loglist_stream = open(self._logfile_list, 'a') except Exception as err: log.error('cannot create loglist, raise IOError') raise IOError('cannot create loglist, {0}'.format(err)) log.info( 'try to recover from last ' 'write if there is any need, truncate_last_failure:{0}'.format( truncate_last_failure) ) self._recover_from_lastwriting(truncate_last_failure)
def refresh(self, key, device_obj=None): """ :param key: refresh the device by key :return: if key does not exist, return False else, fresh the last_healthy time of the device """ assert type(key) == str, 'needs to be a str' got_device = self._devices.get(key) if got_device is None: log.info( 'New device found:%s. To add it into heartbeat service' % key ) new_device = Device(key) new_device.set_last_healthy() self._devices[key] = new_device else: if device_obj is None: got_device.set_last_healthy() log.info( 'Heartbeat: Device %s only refreshed with heartbeat. ' 'Resource not refreshed' % key ) else: log.info( 'Heartbeat: Device %s refreshed with resource. ' % key ) self._devices[key] = device_obj device_obj.set_last_healthy()
def stop(self): """stop the master""" log.info('to stop the arrow master') log.info('to stop control service') self._control_service.stop() log.info('arrow master stopped')
def _send_heartbeat_loop(self): if self._status.get_status() != self._status.RUNNING: log.warn('control service will stop. stop sending heartbeat') return hostinfo = cuphb.LinuxHost( str(self._agent_ipport), True, self._confdict['control']['interface'] ) log.info('to create msg and send msg') netmsg = msg.CNetMsg(is_postmsg=True) netmsg.set_from_addr(self._agent_ipport, (1, 1)) netmsg.set_to_addr(self._master_ipport, (1, 1)) netmsg.set_flag(1) netmsg.set_msg_type(self._type_man.getnumber_bytype('HEART_BEAT')) netmsg.set_uniq_id(1) netmsg.set_body(hostinfo.serilize()) self.post_msg(netmsg) log.info('finish queue sending heartbeat to {0}'.format(self._master_ipport)) self._executor.delay_exec( int(self._confdict['control']['heartbeat_interval']) - 3, self._send_heartbeat_loop, urgency=executor.URGENCY_HIGH )
def read(self, context): """ read with conn context """ if context.is_detroying(): log.debug('The context is being destroyed. return') return if not context.try_readlock(): return log.debug( 'succeed to acquire readlock, to add the \ readjob into the threadpool' ) try: self._do_read(context) self._finish_read_callback(True, context) except Exception as error: log.info('read error occur, error type:{0}, content:{1}'.format( type(error), error) ) log.info(traceback.format_exc()) self._finish_read_callback(False, context)