def kasaya_connection_broken(self, addr): """ Should be called when connection with kasaya is broken. """ LOG.debug("Connection closed with %s", addr) if self.status < 3: # is worker is already stopping? self.status = 1 #set status as 1 - waiting for start
def worker_start_remote(self, worker_id, host_id, address, service): """ Remote worker started """ self.DB.worker_register(host_id, worker_id, service, address) LOG.info("Remote worker [%s] started, address [%s] [id:%s]" % (service, address, worker_id))
def __load_config(self): """ This function is used only if servicename is not given, and daemon is not started by kasaya daemon. """ from kasaya.conf import load_worker_settings, set_value try: config = load_worker_settings("service.conf") except IOError: import sys LOG.critical( "File 'service.conf' not found, unable to start service.") sys.exit(1) # system settings overwriting for k, v in config['config'].items(): set_value(k, v) # worker environment for k, v in config['env'].items(): os.environ[k.upper()] = v # service name svcc = config['service'] svname = svcc['name'] LOG.info("Service config loaded. Service name: %s" % svname) # set flag to load tasks automatically self.__auto_load_tasks_module = svcc['module'] return svname
def hearbeat_loop(self): """ Periodically check all locally registered workers ping time. Unregister dead workers """ maxpinglife = timedelta(seconds=settings.HEARTBEAT_TIMEOUT + settings.WORKER_HEARTBEAT) unreglist = [] while True: now = datetime.now() for ID, nfo in self.__pingdb.iteritems(): # find outdated timeouts to = nfo['t'] + maxpinglife if to < now: LOG.warning( "Worker [%s] with id [%s] died. Unregistering." % (nfo['s'], ID)) unreglist.append(ID) # unregister all dead workers while len(unreglist) > 0: ID = unreglist.pop() self.worker_stop(ID) gevent.sleep(settings.WORKER_HEARTBEAT)
def handle_request(self, message): """ All control requests are handled here. message - message body islocal - if true then request is from localhost """ method = message['method'] #".".join(message['method']) LOG.debug("Management call [%s]" % method) #LOG.debug(repr(message)) try: func = self.__ctltasks[method] # get handler for method except KeyError: raise exceptions.MethodNotFound("Control method %s not exists" % method) # fill missing parameters if 'args' not in message: message['args'] = [] if 'kwargs' not in message: message['kwargs'] = {} try: # call internal function result = func(*message['args'], **message['kwargs']) except RedirectRequiredToIP as e: # redirect to IP addr = self.ip_to_zmq_addr(e.remote_ip) return self.redirect(addr, message) except RedirectRequiredToAddr as e: # redirect to address return self.redirect(e.remote_addr, message) return result
def run_task(self, funcname, args, kwargs): # find task in worker db try: task = worker_methods_db[funcname] except KeyError: self._tasks_nonex += 1 LOG.info("Unknown worker task called [%s]" % funcname) return exception_serialize_internal('Method %s not found' % funcname) # try to run function and catch exceptions try: LOG.debug("task %s, args %s, kwargs %s" % (funcname, repr(args), repr(kwargs))) func = task['func'] tout = task['timeout'] if tout is None: # call task without timeout result = func(*args, **kwargs) else: # call task with timeout with gevent.Timeout(tout, TaskTimeout): result = func(*args, **kwargs) self._tasks_succes += 1 task['res_succ'] += 1 return {'message': messages.RESULT, 'result': result} except TaskTimeout as e: # timeout exceeded self._tasks_error += 1 task['res_tout'] += 1 err = exception_serialize(e, internal=False) LOG.info("Task [%s] timeout (after %i s)." % (funcname, tout)) return err except Exception as e: # exception occured self._tasks_error += 1 task['res_err'] += 1 err = exception_serialize(e, internal=False) LOG.info("Task [%s] exception [%s]. Message: %s" % (funcname, err['name'], err['description'])) LOG.debug(err['traceback']) return err finally: # close django connection # if worker is using Django ORM we must close database connection manually, # or each task will leave one unclosed connection. This is done automatically. if task['close_djconn']: try: _close_dj_connection() except Exception as e: if e.__class__.__name__ == "ImproperlyConfigured": # django connection is not required or diango orm is not used at all, # because of that we replace _close_dj_connection function by empty lambda global _close_dj_connection _close_dj_connection = lambda: None
def on_remote_kasayad_start(self, host_id, addr): """ Remote kasaya host started """ # register self in database self.DB.host_register(host_id, addr) LOG.info("Remote kasaya daemon started, address: %s [id:%s]" % (addr, host_id))
def local_services(): """ Return list of local services """ #cnfame = os.path.join( settings['LOCAL_WORKERS_DIR'], SERVICE_GLOBAL_CONFIG_NAME ) #if os.path.exists(cnfame): # config = KasayaConfigParser(cnfame) #else: # config = None result = {} # # internal services # if settings.USE_ASYNC_SERVICE: # s = Service("asyncd", config) # result[s.name] = s # # if settings.USE_TRANSACTION_SERVICE: # s = Service("transactiond", config) # result[s.name] = s # # if settings.USE_AUTH_SERVICE: # s = Service("authd", config) # result[s.name] = s # # if settings.USE_LOG_SERVICE: # s = Service("logd", config) # result[s.name] = s # user services dname = os.path.abspath(settings.LOCAL_WORKERS_DIR) if not os.path.exists(dname): return result for sdir in os.listdir(dname): # service config fp = os.path.join(dname, sdir) cnf = os.path.join(fp, SERVICE_CONFIG_NAME) if not os.path.exists(cnf): continue # directory contains service try: s = Service(fp) except UnknownServiceMode as e: LOG.error( "Service [%s] requires mode [%s] which is unimplemented. Ignoring." % (e.service, e.mode)) continue if s.name in result: LOG.error("Found more than one service with name [%s]. Ignoring." % s.name) continue result[s.name] = s return result
def worker_start_local(self, worker_id, address, service, pid): """ Local worker started """ self.DB.worker_register(self.DAEMON.ID, worker_id, service, address, pid, False) LOG.info("Local worker [%s] started, address [%s] [id:%s]" % (service, address, worker_id)) # emit signal emit("worker-local-wait", worker_id)
def close(self): """ Notifies network about shutting down, closes database and all used sockets. """ LOG.info("Stopping local kasaya daemon") self.on_local_kasayad_stop(self.ID, local=True) self.WORKER.close() self.DB.close() self.BC.close()
def stop(self): self.__hbloop = False self.status = 3 LOG.debug("Sending stop notification. Address [%s]" % self.loop.address) self.SYNC.notify_worker_stop() self.loop.stop() # killing greenlets for g in self.__greens: g.kill(block=True) LOG.debug("Worker [%s] stopped" % self.servicename)
def CTL_start(self): """ Set status of worker as running. This allow to process tasks """ if self.status == 1: self.status = 2 LOG.info("Received status: running.") # call tasks after worker started listening g = gevent.Greenlet(self._worker_listening) g.start() return True return False
def register_raw_task(self, message_type, raw_responce, func): """ Raw task is used internally to enhance kasaya protocol by handling special types of messages. It's used internally by kasaya own daemons """ self._raw_tasks[message_type] = { 'func': func, 'raw_resp': raw_responce } LOG.debug("Registered raw task handler %s -> %s" % (message_type, func.__name__))
def __init__(self): super(KasayaDaemon, self).__init__(is_host=True) # event handlers add_event_handler("host-join", self.on_remote_kasayad_start) add_event_handler("host-leave", self.on_remote_kasayad_stop) self.hostname = system.get_hostname() LOG.info("Starting local kasaya daemon with ID: [%s]" % self.ID) self.DB = NetworkStateDB() # database self.BC = UDPBroadcast(self.ID) # broadcaster self.SYNC = Synchronizer(self.DB, self.ID) # synchronisation self.WORKER = SyncWorker(server=self, database=self.DB) self.BC.set_own_ip(self.WORKER.own_ip)
def notify_kasayad_refresh(self, ID, services=None, local=False): """ Received information on host changes """ if services is not None: slst = ", ".join(services) if local: # local changes require broadcast new service status self.BC.send_host_refresh(self.ID, services=services) LOG.info("Local service list changed [%s]" % slst) else: # remote host services requires daabase update # local updates are entered to database # before notify_kasayad_refresh is called self.DB.service_update_list(self.ID, services) LOG.info("Remote host service list changed [%s]" % slst)
def run(self): if not self.__skip_loading_modules: self.__load_modules() self.status = 1 LOG.debug("Service [%s] starting." % self.servicename) # before run... self._worker_just_started() self.__greens = [] self.__greens.append(gevent.spawn(self.loop.loop)) self.__greens.append(gevent.spawn(self.heartbeat_loop)) try: gevent.joinall(self.__greens) finally: self.stop() self.close() # just finished working self._worker_just_stopped()
def worker_prepare(self, worker_id): """ After start, worker is in offline state. It need to be configured and after then it can be activated to be online. This function make all required things and when worker is online it broadcast new worker in network. """ wrknfo = self.DB.worker_get(worker_id) # all configuration of worker should be there pass # send information to worker to start processing tasks msg = {'message': messages.CTL_CALL, 'method': 'start'} res = send_and_receive_response(wrknfo['addr'], msg) LOG.debug("Local worker [%s] on [%s] is now online" % (wrknfo['service'], wrknfo['addr'])) # broadcast new worker state self.DB.worker_set_state(worker_id, True)
def notify_kasayad_start(self, ID, hostname, ip, services, local=False): """ Send information about startup of host to all other hosts in network. """ isnew = self.DB.host_register(ID, hostname, ip, services) if local: # it is ourself starting, send broadcast to other kasaya daemons self.BC.send_host_start(ID, hostname, ip, services) if isnew: # new kasayad # send request to local workers to send immadiately ping broadcast # to inform new kasaya daemon about self #self.WORKER.request_workers_broadcast() # it's remote host starting, information is from broadcast LOG.info( "Remote kasaya daemon [%s] started, address [%s], ID [%s]" % (hostname, ip, ID)) # if registered new kasayad AND it's not local host, then # it must be new host in network, which don't know other hosts. # We send again registering information about self syncd instance. gevent.sleep(0.5) self.notify_kasayad_self_start()
def register_task(self, name, func, timeout, anonymous, permissions, close_dj_conn): # task name if name is None: name = func.__name__ if name in self.db: if func == self.db[name]['func']: return c = "Task %s is already registered" % name LOG.critical(c) raise Exception(c) # timeout if not timeout is None: if type(timeout) != int: raise Exception("Timeout must be integer value") if timeout < 0: raise Exception("Timeout cannot be negative value") # extra params for task doc = func.__doc__ if not doc is None: doc = doc.strip() taskdata = { 'func': func, 'doc': doc, # docstring 'timeout': timeout, # timeout in seconds 'anon': anonymous, # can task be executed without authorisation 'perms': permissions, # permissions required to call task 'close_djconn': close_dj_conn, # close django connection on exit 'res_succ': 0, # successful calls 'res_err': 0, # error finishing calls 'res_tout': 0, # timed out calls } self.db[name] = taskdata LOG.debug("Registered task %s" % name)
def __init__(self, silentinit=False): # binary header import struct # # header format # ! big-endian # 6s - 6 characters of header prefix # h - protocol version (=1) # L - unsigned long - data size # 16s - 16 bytes of initial vector (used for encryption) # ? - boolean - compression flag # H - unsigned short - how many bytes trim from data # ? - boolean - response required # self.header = struct.Struct(b"!6s h L 16s ? H ?") self._version = 1 # encrypted or not... if settings.ENCRYPTION: # encrypter, decrypter from kasaya.core.protocol.encryption import encrypt, decrypt self.encrypt = encrypt self.decrypt = decrypt self.serialize = self._encrypted_serialize self.deserialize = self._encrypted_deserialize self._passwd = _load_passwd() else: self.serialize = self._plain_serialize self.deserialize = self._plain_deserialize # servicebus name import sys py3 = sys.version_info >= (3, 0) if py3: busname = bytes(settings.SERVICE_BUS_NAME, "ascii") busname += b" " * (6 - len(busname)) else: busname = str(settings.SERVICE_BUS_NAME) busname += b" " * (6 - len(busname)) self._busname = busname # transport protocol if settings.TRANSPORT_PROTOCOL == "pickle": from kasaya.core.protocol.transport.tr_pickle import bin_2_data, data_2_bin elif settings.TRANSPORT_PROTOCOL == "bson": if py3: # python 3 bson from kasaya.core.protocol.transport.tr_bson3 import bin_2_data, data_2_bin else: # python 2 bson from kasaya.core.protocol.transport.tr_bson2 import bin_2_data, data_2_bin elif settings.TRANSPORT_PROTOCOL == "msgpack": from kasaya.core.protocol.transport.tr_msgpack import bin_2_data, data_2_bin else: raise Exception("Unsupported transport protocol %s" % settings.TRANSPORT_PROTOCOL) self.bin_2_data = bin_2_data self.data_2_bin = data_2_bin if silentinit: return LOG.debug( "Service bus is configured to use %s as transport protocol." % settings.TRANSPORT_PROTOCOL)
set_value(k, v) # setup logging set_value("LOG_TO_FILE", "1") set_value("LOGGER_NAME", servicename) set_value( "LOG_FILE_NAME", os.environ.get('SV_LOG_FILE', "/tmp/service_%s.log" % servicename)) from kasaya.core.lib.logger import stdLogOut from kasaya.core.lib import LOG # redirect stdout and stderr to log sys.stdout = stdLogOut(LOG, "DEBUG") sys.stderr = stdLogOut(LOG, "ERROR") LOG.stetupLogger() if kasayad_mode: # starting kasaya daemon from kasaya.workers.kasayad import KasayaDaemon daemon = KasayaDaemon() daemon.run() else: # starting regular worker from kasaya import WorkerDaemon cwd = os.getcwd() if not cwd in sys.path: sys.path.append(cwd) __import__(module) worker = WorkerDaemon(servicename, False) worker.run()
def connection_handler(self, SOCK, address): ssid = None while True: try: msgdata, resreq = _receive_and_deserialize( SOCK, self.serializer) except (NoData, ConnectionClosed): return try: msg = msgdata['message'] except KeyError: if resreq: self._send_noop(SOCK) LOG.debug("Decoded message is incomplete. Message dump: %s" % repr(msgdata)) continue # message SET_SESSION_ID is special message # it never return reply and is not propagated to handlers if msg == messages.SET_SESSION_ID: try: ssid = msgdata['id'] #print("conn session id" , address, ssid) except KeyError: pass if resreq: self._send_noop(SOCK) continue # find message handler try: handler, rawmsg = self._msgdb[msg] except KeyError: # unknown messages are ignored if resreq: self._send_noop(SOCK) LOG.warning("Unknown message received [%s]" % msg) LOG.debug("Message body dump:\n%s" % repr(msgdata)) continue # run handler try: result = handler(msgdata) except Exception as e: result = exception_serialize(e, False) LOG.info( "Exception [%s] when processing message [%s]. Message: %s." % (result['name'], msg, result['description'])) #LOG.debug("Message dump: %s" % repr(msgdata) ) #LOG.debug(result['traceback']) if not resreq: # if response is not required, then don't send exceptions continue _serialize_and_send( SOCK, self.serializer, exception_serialize(e, False), resreq=False, # response never require another response ) continue # response is not expected, throw result and back to loop if not resreq: continue try: # send result if rawmsg: _serialize_and_send( SOCK, self.serializer, result, resreq=False, ) else: _serialize_and_send(SOCK, self.serializer, { "message": messages.RESULT, "result": result, }, resreq=False) except ConnectionClosed: return
def worker_stop_local(self, worker_id): """ Local worker stopped """ self.DB.worker_unregister(ID=worker_id) LOG.info("Local worker stopped [id:%s]" % worker_id)
def kasaya_connection_started(self, addr): """ This will be called when connection with kasaya is started """ LOG.debug("Connected to %s", addr) self.SYNC.notify_worker_live(self.status)
def worker_stop_remote(self, worker_id): """ Remote worker stopped """ self.DB.worker_unregister(ID=worker_id) LOG.info("Remote worker stopped [id:%s]" % worker_id)
def __init__(self, servicename=None, load_config=True, skip_loading_modules=False): super(WorkerDaemon, self).__init__() # config loader if servicename is None: load_config = True if load_config: LOG.info("Loading service.conf") if servicename is None: servicename = self.__load_config() self.servicename = servicename self.__skip_loading_modules = skip_loading_modules # worker status # 0 - initialized # 1 - starting or waiting for reconnect to kasaya # 2 - working # 3 - stopping # 4 - dead self.status = 0 LOG.info("Starting worker daemon, service [%s], ID: [%s]" % (self.servicename, self.ID)) adr = "tcp://%s:%i" % (settings.BIND_WORKER_TO, settings.WORKER_MIN_PORT) self.loop = MessageLoop(adr, settings.WORKER_MAX_PORT) add_event_handler("sender-conn-closed", self.kasaya_connection_broken) add_event_handler("sender-conn-started", self.kasaya_connection_started) self.SYNC = KasayaLocalClient(autoreconnect=True, sessionid=self.ID) self.SYNC.setup(servicename, self.loop.address, self.ID, os.getpid()) LOG.debug("Binded to socket [%s]" % (",".join(self.loop.binded_ip_list()))) # registering handlers self.loop.register_message(messages.SYNC_CALL, self.handle_sync_call, raw_msg_response=True) self.loop.register_message(messages.CTL_CALL, self.handle_control_request) # heartbeat self.__hbloop = True #exposing methods self.exposed_methods = [] # control tasks self.ctl = ControlTasks() self.ctl.register_task("stop", self.CTL_stop) self.ctl.register_task("start", self.CTL_start) self.ctl.register_task("stats", self.CTL_stats) self.ctl.register_task("tasks", self.CTL_methods) # stats #self._sb_errors = 0 # internal service bus errors self._tasks_succes = 0 # succesfully processed tasks self._tasks_error = 0 # task which triggered exceptions self._tasks_nonex = 0 # non existing tasks called self._tasks_control = 0 # control tasks received self._start_time = datetime.datetime.now() # time of worker start
def loop(self): while self.is_running: # receive data msgdata, addr = self.SOCK.recvfrom(4096) # skip own broadcast messages if addr[0] == self.own_ip: continue # deserialize try: msgdata, repreq = self.serializer.deserialize(msgdata) except NotOurMessage: continue except Exception: LOG.warning("Message from broadcast deserialisation error") LOG.debug( "Broken message body dump in hex (only first 1024 bytes):\n%s" % msgdata[:1024].encode("hex")) continue # own broadcast from another interface try: if msgdata['__sid__'] == self.ID: continue except KeyError: continue # message type try: msg = msgdata['message'] except KeyError: LOG.debug("Decoded message is incomplete. Message dump: %s" % repr(msgdata)) continue # find handler try: handler = self._msgdb[msg] except KeyError: # unknown messages are ignored silently LOG.warning("Unknown message received [%s]" % msg) LOG.debug("Message body dump:\n%s" % repr(msgdata)) continue # run handler try: handler(msgdata) except Exception as e: # log exception details excname = e.__class__.__name__ # traceback tback = traceback.format_exc() try: tback = unicode(tback, "utf-8") except: tback = repr(tback) # error message errmsg = e.message try: errmsg = unicode(errmsg, "utf-8") except: errmsg = repr(errmsg) # log & clean LOG.error( "Exception [%s] when processing message [%s]. Message: %s." % (excname, msg, errmsg)) LOG.debug("Message dump: %s" % repr(msgdata)) LOG.debug(tback) del excname, tback, errmsg
def on_remote_kasayad_stop(self, host_id): """ received information about kasaya host leaving network """ self.DB.host_unregister(self.ID) LOG.info("Remote kasaya daemon stopped, [id:%s]" % host_id)