def __init__(self, bus, objpath, store): dbus.service.Object.__init__(self, bus, objpath) self.store = store #: maps agentid (ex. inject-:1.234) to object path (ex: #: /agent/inject) self.clients = {} self.exiting = False #: locks[domain] is a set of (lockid, selector) whose processing #: has started (might even be finished). Allows several agents that #: perform the same stateless computation to run in parallel self.locks = defaultdict(set) signal.signal(signal.SIGTERM, self.sigterm_handler) #: maps agentids to their names self.agentnames = {} #: maps agentids to their serialized configuration - output altering #: options only self.agents_output_altering_options = {} #: maps agentids to their serialized configuration self.agents_full_config_txts = {} #: monotonically increasing user request counter self.userrequestid = 0 #: number of descriptors self.descriptor_count = 0 #: count descriptors marked as processed/processable by each uniquely #: configured agent self.descriptor_handled_count = {} #: uniq_conf_clients[(agent_name, config_txt)] = [agent_id, ...] self.uniq_conf_clients = defaultdict(list) #: retry_counters[(agent_name, config_txt, domain, selector)] = \ #: number of remaining retries self.retry_counters = defaultdict(dict) self.sched = Sched(self._sched_inject)
def __init__(self, options): Bus.__init__(self) #: stores currently held locks [(lockid, domain, selector)] self.locks = defaultdict(set) #: Next available agent id. Never decreases. self.agent_count = 0 self.store = RAMStorage() # TODO add support for DiskStorage ? # TODO save internal state at bus exit (only useful with DiskStorage) #: maps agentid (ex. inject-12) to agentdesc self.agent_descs = {} #: maps agentid to agent instance self.agents = {} self.threads = [] #: maps agentids to their serialized configuration - output altering #: options only self.agents_output_altering_options = {} #: maps agentids to their serialized configuration self.agents_full_config_txts = {} #: monotonically increasing user request counter self.userrequestid = 0 #: retry_counters[(agent_name, config_txt, domain, selector)] = \ #: number of remaining retries self.retry_counters = defaultdict(dict) self.sched = Sched(self._sched_inject)
class DBusMaster(dbus.service.Object, BusMaster): _name_ = "dbus" _desc_ = "Use RabbitMQ to exchange messages" def __init__(self, bus, objpath, store): dbus.service.Object.__init__(self, bus, objpath) self.store = store #: maps agentid (ex. inject-:1.234) to object path (ex: #: /agent/inject) self.clients = {} self.exiting = False #: locks[domain] is a set of (lockid, selector) whose processing #: has started (might even be finished). Allows several agents that #: perform the same stateless computation to run in parallel self.locks = defaultdict(set) signal.signal(signal.SIGTERM, self.sigterm_handler) #: maps agentids to their names self.agentnames = {} #: maps agentids to their serialized configuration - output altering #: options only self.agents_output_altering_options = {} #: maps agentids to their serialized configuration self.agents_full_config_txts = {} #: monotonically increasing user request counter self.userrequestid = 0 #: number of descriptors self.descriptor_count = 0 #: count descriptors marked as processed/processable by each uniquely #: configured agent self.descriptor_handled_count = {} #: uniq_conf_clients[(agent_name, config_txt)] = [agent_id, ...] self.uniq_conf_clients = defaultdict(list) #: retry_counters[(agent_name, config_txt, domain, selector)] = \ #: number of remaining retries self.retry_counters = defaultdict(dict) self.sched = Sched(self._sched_inject) def update_check_idle(self, agent_name, output_altering_options): """ Increases the count of handled descriptors and checks if all descriptors have been handled (processed/marked as processable). In that case, send the "on_idle" message. """ name_config = (agent_name, output_altering_options) self.descriptor_handled_count[name_config] += 1 self.check_idle() def check_idle(self): if self.exiting: return # Check if we have reached idle state nbdistinctagents = len(self.descriptor_handled_count) nbhandlings = sum(self.descriptor_handled_count.values()) if self.descriptor_count * nbdistinctagents == nbhandlings: log.debug( "IDLE: %d agents having distinct (name, config) %d " "descriptors %d handled", nbdistinctagents, self.descriptor_count, nbhandlings) self.on_idle() @dbus.service.method(dbus_interface='com.airbus.rebus.bus', in_signature='ssos', out_signature='') def register(self, agent_id, agent_domain, pth, config_txt): #: indicates whether another instance of the same agent is already #: running with the same configuration agent_name = agent_id.split('-', 1)[0] self.agentnames[agent_id] = agent_name output_altering_options = get_output_altering_options(str(config_txt)) name_config = (agent_name, output_altering_options) already_running = len(self.uniq_conf_clients[name_config]) > 1 self.uniq_conf_clients[name_config].append(agent_id) self.clients[agent_id] = pth self.agents_output_altering_options[agent_id] = output_altering_options self.agents_full_config_txts[agent_id] = str(config_txt) log.info("New client %s (%s) in domain %s with config %s", pth, agent_id, agent_domain, config_txt) # Send not-yet processed descriptors to the agent... if not already_running: # ...unless another instance of the same agent has already been # started, and should be processing those descriptors unprocessed = \ self.store.list_unprocessed_by_agent(agent_name, output_altering_options) self.descriptor_handled_count[name_config] = \ self.descriptor_count - len(unprocessed) for dom, uuid, sel in unprocessed: self.targeted_descriptor("storage", dom, uuid, sel, [agent_name], False) @dbus.service.method(dbus_interface='com.airbus.rebus.bus', in_signature='s', out_signature='') def unregister(self, agent_id): log.info("Agent %s has unregistered", agent_id) agent_name = self.agentnames[agent_id] options = self.agents_output_altering_options[agent_id] name_config = (agent_name, options) self.uniq_conf_clients[name_config].remove(agent_id) if len(self.uniq_conf_clients[name_config]) == 0: del self.descriptor_handled_count[name_config] del self.clients[agent_id] self.check_idle() if self.exiting: if len(self.clients) == 0: log.info("Exiting - no agents are running") self.mainloop.quit() else: log.info("Expecting %u more agents to exit (ex. %s)", len(self.clients), self.clients.keys()[0]) @dbus.service.method(dbus_interface='com.airbus.rebus.bus', in_signature='ssss', out_signature='b') def lock(self, agent_id, lockid, desc_domain, selector): objpath = self.clients[agent_id] locks = self.locks[desc_domain] key = (lockid, selector) log.debug("LOCK:%s %s(%s) => %r %s:%s ", lockid, objpath, agent_id, key in locks, desc_domain, selector) if key in locks: return False locks.add(key) return True @dbus.service.method(dbus_interface='com.airbus.rebus.bus', in_signature='ssssbuu', out_signature='') def unlock(self, agent_id, lockid, desc_domain, selector, processing_failed, retries, wait_time): objpath = self.clients[agent_id] locks = self.locks[desc_domain] lkey = (lockid, selector) log.debug("UNLOCK:%s %s(%s) => %r %d:%d ", lockid, objpath, agent_id, processing_failed, retries, wait_time) if lkey not in locks: return locks.remove(lkey) # find agent_name, config_txt for (agent_name, config_txt), ids in self.uniq_conf_clients.items(): if agent_id in ids: break rkey = (agent_name, config_txt, desc_domain, selector) if rkey not in self.retry_counters: self.retry_counters[rkey] = retries if self.retry_counters[rkey] > 0: self.retry_counters[rkey] -= 1 desc = self.store.get_descriptor(desc_domain, selector) uuid = desc.uuid self.sched.add_action( wait_time, (agent_id, desc_domain, uuid, selector, agent_name)) @dbus.service.method(dbus_interface='com.airbus.rebus.bus', in_signature='ss', out_signature='b') def push(self, agent_id, serialized_descriptor): descriptor = Descriptor.unserialize(serializer, str(serialized_descriptor)) desc_domain = str(descriptor.domain) uuid = str(descriptor.uuid) selector = str(descriptor.selector) if self.store.add(descriptor): self.descriptor_count += 1 log.debug("PUSH: %s => %s:%s", agent_id, desc_domain, selector) if not self.exiting: self.new_descriptor(agent_id, desc_domain, uuid, selector) # useful in case all agents are in idle/interactive mode self.check_idle() return True else: log.debug("PUSH: %s already seen => %s:%s", agent_id, desc_domain, selector) return False @dbus.service.method(dbus_interface='com.airbus.rebus.bus', in_signature='sss', out_signature='s') def get(self, agent_id, desc_domain, selector): log.debug("GET: %s %s:%s", agent_id, desc_domain, selector) desc = self.store.get_descriptor(str(desc_domain), str(selector)) if desc is None: return "" return desc.serialize_meta(serializer) @dbus.service.method(dbus_interface='com.airbus.rebus.bus', in_signature='sss', out_signature='s') def get_value(self, agent_id, desc_domain, selector): log.debug("GETVALUE: %s %s:%s", agent_id, desc_domain, selector) value = self.store.get_value(str(desc_domain), str(selector)) if value is None: return "" return serializer.dumps(value) @dbus.service.method(dbus_interface='com.airbus.rebus.bus', in_signature='ss', out_signature='a{ss}') def list_uuids(self, agent_id, desc_domain): log.debug("LISTUUIDS: %s %s", agent_id, desc_domain) return self.store.list_uuids(str(desc_domain)) @dbus.service.method(dbus_interface='com.airbus.rebus.bus', in_signature='sssuu', out_signature='as') def find(self, agent_id, desc_domain, selector_regex, limit=0, offset=0): log.debug("FIND: %s %s:%s (max %d skip %d)", agent_id, desc_domain, selector_regex, limit, offset) selectors = self.store.find(str(desc_domain), str(selector_regex), str(limit), int(offset)) return [str(s) for s in selectors] @dbus.service.method(dbus_interface='com.airbus.rebus.bus', in_signature='sssuu', out_signature='as') def find_by_selector(self, agent_id, desc_domain, selector_prefix, limit=0, offset=0): log.debug("FINDBYVALUE: %s %s %s (max %d skip %d)", agent_id, desc_domain, selector_prefix, limit, offset) descs = self.store.find_by_selector(str(desc_domain), str(selector_prefix), int(limit), int(offset)) return [desc.serialize_meta(serializer) for desc in descs] @dbus.service.method(dbus_interface='com.airbus.rebus.bus', in_signature='sss', out_signature='as') def find_by_uuid(self, agent_id, desc_domain, uuid): log.debug("FINDBYUUID: %s %s:%s", agent_id, desc_domain, uuid) descs = self.store.find_by_uuid(str(desc_domain), str(uuid)) return [desc.serialize_meta(serializer) for desc in descs] @dbus.service.method(dbus_interface='com.airbus.rebus.bus', in_signature='ssss', out_signature='as') def find_by_value(self, agent_id, desc_domain, selector_prefix, value_regex): log.debug("FINDBYVALUE: %s %s %s %s", agent_id, desc_domain, selector_prefix, value_regex) descs = self.store.find_by_value(str(desc_domain), str(selector_prefix), str(value_regex)) return [desc.serialize_meta(serializer) for desc in descs] @dbus.service.method(dbus_interface='com.airbus.rebus.bus', in_signature='sss', out_signature='') def mark_processed(self, agent_id, desc_domain, selector): agent_name = self.agentnames[agent_id] options = self.agents_output_altering_options[agent_id] log.debug("MARK_PROCESSED: %s:%s %s %s", desc_domain, selector, agent_id, options) isnew = self.store.mark_processed(str(desc_domain), str(selector), agent_name, str(options)) if isnew: self.update_check_idle(agent_name, options) @dbus.service.method(dbus_interface='com.airbus.rebus.bus', in_signature='sss', out_signature='') def mark_processable(self, agent_id, desc_domain, selector): agent_name = self.agentnames[agent_id] options = self.agents_output_altering_options[agent_id] log.debug("MARK_PROCESSABLE: %s:%s %s %s", desc_domain, selector, agent_id, options) isnew = self.store.mark_processable(str(desc_domain), str(selector), agent_name, str(options)) if isnew: self.update_check_idle(agent_name, options) @dbus.service.method(dbus_interface='com.airbus.rebus.bus', in_signature='sss', out_signature='aas') def get_processable(self, agent_id, desc_domain, selector): log.debug("GET_PROCESSABLE: %s:%s %s", desc_domain, selector, agent_id) return self.store.get_processable(str(desc_domain), str(selector)) @dbus.service.method(dbus_interface='com.airbus.rebus.bus', in_signature='', out_signature='a{su}') def list_agents(self, agent_id): log.debug("LIST_AGENTS: %s", agent_id) #: maps agent name to number of instances of this agent counts = dict( Counter( objpath.rsplit('/', 1)[1] for objpath in self.clients.values())) return counts @dbus.service.method(dbus_interface='com.airbus.rebus.bus', in_signature='ss', out_signature='a(su)u') def processed_stats(self, agent_id, desc_domain): log.debug("PROCESSED_STATS: %s %s", agent_id, desc_domain) return self.store.processed_stats(str(desc_domain)) @dbus.service.method(dbus_interface='com.airbus.rebus.bus', in_signature='sssb', out_signature='as') def get_children(self, agent_id, desc_domain, selector, recurse): log.debug("GET_CHILDREN: %s %s:%s", agent_id, desc_domain, selector) descs = self.store.get_children(str(desc_domain), str(selector), recurse=bool(recurse)) return [desc.serialize_meta(serializer) for desc in descs] @dbus.service.method(dbus_interface='com.airbus.rebus.bus', in_signature='ss', out_signature='') def store_internal_state(self, agent_id, state): agent_name = self.agentnames[str(agent_id)] log.debug("STORE_INTSTATE: %s", agent_name) if self.store.STORES_INTSTATE: self.store.store_agent_state(agent_name, str(state)) @dbus.service.method(dbus_interface='com.airbus.rebus.bus', in_signature='s', out_signature='s') def load_internal_state(self, agent_id): agent_name = self.agentnames[str(agent_id)] log.debug("LOAD_INTSTATE: %s", agent_name) if self.store.STORES_INTSTATE: return self.store.load_agent_state(agent_name) return "" @dbus.service.method(dbus_interface='com.airbus.rebus.bus', in_signature='sssas', out_signature='') def request_processing(self, agent_id, desc_domain, selector, targets): log.debug("REQUEST_PROCESSING: %s %s:%s targets %s", agent_id, desc_domain, selector, [str(t) for t in targets]) d = self.store.get_descriptor(str(desc_domain), str(selector)) self.userrequestid += 1 self.targeted_descriptor(agent_id, desc_domain, d.uuid, selector, targets, self.userrequestid) @dbus.service.signal(dbus_interface='com.airbus.rebus.bus', signature='ssss') def new_descriptor(self, sender_id, desc_domain, uuid, selector): pass @dbus.service.signal(dbus_interface='com.airbus.rebus.bus', signature='ssssasb') def targeted_descriptor(self, sender_id, desc_domain, uuid, selector, targets, user_request): """ Signal sent when a descriptor is sent to some target agents (not broadcast). Useful for: * Forcefully replaying a descriptor (debug purposes, or user request) * Feeding descriptors to a new agent. Used when resuming the bus. * Interactive mode - user may choose which selectors get send to each agent :param sender_id: sender id :param desc_domain: descriptor domain :param uuid: descriptor uuid :param selector: descriptor selector :param targets: list of target agent names. Agents not in this list should ignore this descriptor. :param user_request: True if this is a user request targeting agents running in interactive mode. """ pass @dbus.service.signal(dbus_interface='com.airbus.rebus.bus', signature='b') def bus_exit(self, awaiting_internal_state): """ Signal sent when the bus is exiting. :param awaiting_internal_state: indicates whether agents must send their internal serialized state for storage. """ self.exiting = True return @dbus.service.signal(dbus_interface='com.airbus.rebus.bus', signature='') def on_idle(self): """ Signal sent when the bus is idle, i.e. all descriptors have been marked as processed or processable by agents. """ pass @classmethod def run(cls, store, master_options): gobject.threads_init() dbus.glib.init_threads() DBusGMainLoop(set_as_default=True) bus = dbus.SessionBus() name = dbus.service.BusName("com.airbus.rebus.bus", bus) svc = cls(bus, "/bus", store) svc.mainloop = gobject.MainLoop() log.info("Entering main loop.") try: svc.mainloop.run() except (KeyboardInterrupt, SystemExit): if len(svc.clients) > 0: log.info("Trying to stop all agents properly. Press Ctrl-C " "again to stop.") # stop scheduler svc.sched.shutdown() # ask slave agents to shutdown nicely & save internal state log.info("Expecting %u more agents to exit (ex. %s)", len(svc.clients), svc.clients.keys()[0]) svc.bus_exit(store.STORES_INTSTATE) store.store_state() try: svc.mainloop.run() except (KeyboardInterrupt, SystemExit): if len(svc.clients) > 0: log.info( "Not all agents have stopped, exiting nonetheless") log.info("Stopping storage...") store.store_state() @staticmethod def sigterm_handler(sig, frame): # Try to exit cleanly the first time; if that does not work, exit. # raises SystemExit, caught in run() sys.exit(0) @staticmethod def add_arguments(subparser): # TODO allow specifying dbus address? Currently specified by local dbus # configuration file or environment variable pass def busthread_call(self, method, *args): gobject.idle_add(method, *args) def _sched_inject(self, agent_id, desc_domain, uuid, selector, target): """ Called by Sched object, from Timer thread. Emits targeted_descriptor through bus thread. """ self.busthread_call( self.targeted_descriptor, *(agent_id, desc_domain, uuid, selector, [target], False))
def __init__(self, store, server_addr, heartbeat_interval=0): self.store = store #: maps agent_id (ex. inject-:1.234) to object path (ex: /agent/inject) self.clients = {} self.exiting = False #: locks[domain] is a set of (lockid, selector) whose processing #: has started (might even be finished). Allows several agents that #: perform the same stateless computation to run in parallel self.locks = defaultdict(set) signal.signal(signal.SIGTERM, self._sigterm_handler) #: maps agent_id to agent name self.agentnames = {} #: maps agent_id to agent's serialized configuration - output altering #: options only self.agents_output_altering_options = {} #: maps agent_id to agent's serialized configuration self.agents_full_config_txts = {} #: monotonically increasing user request counter self.userrequestid = 0 #: number of descriptors self.descriptor_count = 0 #: count descriptors marked as processed/processable by each uniquely #: configured agent self.descriptor_handled_count = {} #: uniq_conf_clients[(agent_name, config_txt)] = [agent_id, ...] self.uniq_conf_clients = defaultdict(list) #: retry_counters[(agent_name, config_txt, domain, selector)] = \ #: number of remaining retries self.retry_counters = defaultdict(dict) self.sched = Sched(self._sched_inject) #: last published agent id self.last_published_id = 0 #: bus session id, to make sure agents were not registered to another #: bus master (ex. which has exited) self.session_id = os.urandom(5).encode('hex') # Connects to the rabbitmq server self.server_addr = ( server_addr + "/%2F?connection_attempts=200&heartbeat_interval=" + str(heartbeat_interval)) self.params = pika.URLParameters(self.server_addr) b = False while not b: try: self.connection = pika.BlockingConnection(self.params) b = True except pika.exceptions.ConnectionClosed: log.warning("Cannot connect to rabbitmq at: %s. Retrying...", self.server_addr) time.sleep(0.5) self.channel = self.connection.channel() # Create the registration queue self.channel.queue_declare(queue="registration_queue") self.channel.queue_purge(queue="registration_queue") # Create the exchange for signals publish(master)/subscribe(slave) self.signal_exchange = self.channel.exchange_declare( exchange='rebus_signals', exchange_type='fanout') # Create the rpc queue self.channel.queue_declare(queue='rebus_master_rpc_highprio') self.channel.queue_purge(queue='rebus_master_rpc_highprio') self.channel.basic_consume(self._rpc_callback, queue='rebus_master_rpc_highprio', arguments={'x-priority': 1}) self.channel.queue_declare(queue='rebus_master_rpc_lowprio') self.channel.queue_purge(queue='rebus_master_rpc_lowprio') self.channel.basic_consume(self._rpc_callback, queue='rebus_master_rpc_lowprio', arguments={'x-priority': 0}) # bus is now ready to serve requests, publish registration IDs self._publish_ids(10000)
class RabbitBusMaster(BusMaster): _name_ = "rabbit" _desc_ = "Use RabbitMQ to exchange messages" def __init__(self, store, server_addr, heartbeat_interval=0): self.store = store #: maps agent_id (ex. inject-:1.234) to object path (ex: /agent/inject) self.clients = {} self.exiting = False #: locks[domain] is a set of (lockid, selector) whose processing #: has started (might even be finished). Allows several agents that #: perform the same stateless computation to run in parallel self.locks = defaultdict(set) signal.signal(signal.SIGTERM, self._sigterm_handler) #: maps agent_id to agent name self.agentnames = {} #: maps agent_id to agent's serialized configuration - output altering #: options only self.agents_output_altering_options = {} #: maps agent_id to agent's serialized configuration self.agents_full_config_txts = {} #: monotonically increasing user request counter self.userrequestid = 0 #: number of descriptors self.descriptor_count = 0 #: count descriptors marked as processed/processable by each uniquely #: configured agent self.descriptor_handled_count = {} #: uniq_conf_clients[(agent_name, config_txt)] = [agent_id, ...] self.uniq_conf_clients = defaultdict(list) #: retry_counters[(agent_name, config_txt, domain, selector)] = \ #: number of remaining retries self.retry_counters = defaultdict(dict) self.sched = Sched(self._sched_inject) #: last published agent id self.last_published_id = 0 #: bus session id, to make sure agents were not registered to another #: bus master (ex. which has exited) self.session_id = os.urandom(5).encode('hex') # Connects to the rabbitmq server self.server_addr = ( server_addr + "/%2F?connection_attempts=200&heartbeat_interval=" + str(heartbeat_interval)) self.params = pika.URLParameters(self.server_addr) b = False while not b: try: self.connection = pika.BlockingConnection(self.params) b = True except pika.exceptions.ConnectionClosed: log.warning("Cannot connect to rabbitmq at: %s. Retrying...", self.server_addr) time.sleep(0.5) self.channel = self.connection.channel() # Create the registration queue self.channel.queue_declare(queue="registration_queue") self.channel.queue_purge(queue="registration_queue") # Create the exchange for signals publish(master)/subscribe(slave) self.signal_exchange = self.channel.exchange_declare( exchange='rebus_signals', exchange_type='fanout') # Create the rpc queue self.channel.queue_declare(queue='rebus_master_rpc_highprio') self.channel.queue_purge(queue='rebus_master_rpc_highprio') self.channel.basic_consume(self._rpc_callback, queue='rebus_master_rpc_highprio', arguments={'x-priority': 1}) self.channel.queue_declare(queue='rebus_master_rpc_lowprio') self.channel.queue_purge(queue='rebus_master_rpc_lowprio') self.channel.basic_consume(self._rpc_callback, queue='rebus_master_rpc_lowprio', arguments={'x-priority': 0}) # bus is now ready to serve requests, publish registration IDs self._publish_ids(10000) def _publish_ids(self, amount): for i in range(self.last_published_id, self.last_published_id + amount): new_id = "%s-%d" % (self.session_id, i) self.channel.basic_publish(exchange="", routing_key="registration_queue", body=new_id, properties=pika.BasicProperties( delivery_mode=2, )) self.last_published_id += amount def _check_agent_id(self, agent_id): """ Checks agent_id prefix """ if self.session_id not in agent_id: log.warning( "Received method call from agent %s which is not registered " "to this Bus Master session (stale agent?).", agent_id) return False return True def _send_signal(self, signal_name, args): # Send a signal on the exchange body = {'signal_name': signal_name, 'args': args} body = serializer.dumps(body) b = False while not b: try: self.channel.basic_publish(exchange='rebus_signals', routing_key='', body=body, properties=pika.BasicProperties( delivery_mode=2, )) b = True except pika.exceptions.ConnectionClosed: log.info("Disconnected (in _send_signal). " "Trying to reconnect...") self._reconnect() time.sleep(0.5) # TODO Check is the key is valid def _call_rpc_func(self, name, args): f = { 'register': self.register, 'unregister': self.unregister, 'lock': self.lock, 'unlock': self.unlock, 'push': self.push, 'get': self.get, 'get_value': self.get_value, 'list_uuids': self.list_uuids, 'find': self.find, 'find_by_uuid': self.find_by_uuid, 'find_by_selector': self.find_by_selector, 'find_by_value': self.find_by_value, 'mark_processed': self.mark_processed, 'mark_processable': self.mark_processable, 'get_processable': self.get_processable, 'list_agents': self.list_agents, 'processed_stats': self.processed_stats, 'get_children': self.get_children, 'store_internal_state': self.store_internal_state, 'load_internal_state': self.load_internal_state, 'request_processing': self.request_processing, } return f[name](**args) def _rpc_callback(self, ch, method, properties, body): # Parse the rpc request body = serializer.loads(body) func_name = body['func_name'] args = body['args'] # Call the function ret = self._call_rpc_func(func_name, args) ret = serializer.dumps(ret) # Push the result of the function on the return queue b = False while not b: try: retpublish = ch.basic_publish( exchange='', routing_key=properties.reply_to, body=ret, properties=pika.BasicProperties( correlation_id=properties.correlation_id)) b = True except pika.exceptions.ConnectionClosed: log.info( "Disconnected (in _rpc_callback). Trying to reconnect") self._reconnect() ch.basic_ack(delivery_tag=method.delivery_tag) def _update_check_idle(self, agent_name, output_altering_options): """ Increases the count of handled descriptors and checks if all descriptors have been handled (processed/marked as processable). In that case, send the "on_idle" message. """ name_config = (agent_name, output_altering_options) self.descriptor_handled_count[name_config] += 1 self._check_idle() def _check_idle(self): if self.exiting: return # Check if we have reached idle state nbdistinctagents = len(self.descriptor_handled_count) nbhandlings = sum(self.descriptor_handled_count.values()) if self.descriptor_count * nbdistinctagents == nbhandlings: log.debug( "IDLE: %d agents having distinct (name, config) %d " "descriptors %d handled", nbdistinctagents, self.descriptor_count, nbhandlings) self._on_idle() def register(self, agent_id, agent_domain, pth, config_txt, processes_descriptors): if not self._check_agent_id(agent_id): return if not format_check.is_valid_domain(agent_domain): return # replenish id queue self._publish_ids(1) #: indicates whether another instance of the same agent is already #: running with the same configuration agent_name = agent_id.split('-', 1)[0] self.agentnames[agent_id] = agent_name output_altering_options = get_output_altering_options(str(config_txt)) name_config = (agent_name, output_altering_options) already_running = len(self.uniq_conf_clients[name_config]) > 1 self.uniq_conf_clients[name_config].append(agent_id) self.clients[agent_id] = pth self.agents_output_altering_options[agent_id] = output_altering_options self.agents_full_config_txts[agent_id] = str(config_txt) log.info("New client %s (%s) in domain %s with config %s", pth, agent_id, agent_domain, config_txt) # Send not-yet processed descriptors to the agent... if not processes_descriptors: self.descriptor_handled_count[name_config] = 0 elif not already_running: # ...unless another instance of the same agent has already been # started, and should be processing those descriptors unprocessed = \ self.store.list_unprocessed_by_agent(agent_name, output_altering_options) self.descriptor_handled_count[name_config] = \ self.descriptor_count - len(unprocessed) for dom, uuid, sel in unprocessed: self._targeted_descriptor("storage", dom, uuid, sel, [agent_name], False) def unregister(self, agent_id): log.info("Agent %s has unregistered", agent_id) if not self._check_agent_id(agent_id): return agent_name = self.agentnames[agent_id] options = self.agents_output_altering_options[agent_id] name_config = (agent_name, options) self.uniq_conf_clients[name_config].remove(agent_id) if len(self.uniq_conf_clients[name_config]) == 0: del self.descriptor_handled_count[name_config] del self.clients[agent_id] self._check_idle() if self.exiting: if len(self.clients) == 0: log.info("Exiting - no agents are running") self.channel.stop_consuming() else: log.info("Expecting %u more agents to exit (ex. %s)", len(self.clients), self.clients.keys()[0]) def lock(self, agent_id, lockid, desc_domain, selector): if not self._check_agent_id(agent_id): return False if not format_check.is_valid_domain(desc_domain): return False if not format_check.is_valid_fullselector(selector): return False objpath = self.clients[agent_id] locks = self.locks[desc_domain] key = (lockid, selector) log.debug("LOCK:%s %s(%s) => %r %s:%s ", lockid, objpath, agent_id, key in locks, desc_domain, selector) if key in locks: return False locks.add(key) return True def unlock(self, agent_id, lockid, desc_domain, selector, processing_failed, retries, wait_time): if not self._check_agent_id(agent_id): return if not format_check.is_valid_domain(desc_domain): return if not format_check.is_valid_fullselector(selector): return objpath = self.clients[agent_id] locks = self.locks[desc_domain] lkey = (lockid, selector) log.debug("UNLOCK:%s %s(%s) => %r %d:%d ", lockid, objpath, agent_id, processing_failed, retries, wait_time) if lkey not in locks: return locks.remove(lkey) # find agent_name, config_txt for (agent_name, config_txt), ids in self.uniq_conf_clients.items(): if agent_id in ids: break rkey = (agent_name, config_txt, desc_domain, selector) if rkey not in self.retry_counters: self.retry_counters[rkey] = retries if self.retry_counters[rkey] > 0: self.retry_counters[rkey] -= 1 desc = self.store.get_descriptor(desc_domain, selector) uuid = desc.uuid self.sched.add_action( wait_time, (agent_id, desc_domain, uuid, selector, agent_name)) def push(self, agent_id, serialized_descriptor): if not self._check_agent_id(agent_id): return False descriptor = Descriptor.unserialize(serializer, str(serialized_descriptor)) desc_domain = str(descriptor.domain) uuid = str(descriptor.uuid) selector = str(descriptor.selector) # ensure processing terminates if not format_check.processing_depth(self.store, descriptor): log.warning( "Refusing descriptor %s:%s: loop or >2 ancestors " "having the same descriptor, or invalid precursor", desc_domain, selector) return False if self.store.add(descriptor): self.descriptor_count += 1 log.debug("PUSH: %s => %s:%s", agent_id, desc_domain, selector) if not self.exiting: self._new_descriptor(agent_id, desc_domain, uuid, selector) # useful in case all agents are in idle/interactive mode self._check_idle() return True else: log.debug("PUSH: %s already seen => %s:%s", agent_id, desc_domain, selector) return False def get(self, agent_id, desc_domain, selector): log.debug("GET: %s %s:%s", agent_id, desc_domain, selector) if not self._check_agent_id(agent_id): return None if not format_check.is_valid_domain(desc_domain): return None if not format_check.is_valid_selector(selector): return None desc = self.store.get_descriptor(str(desc_domain), str(selector)) if desc is None: return "" return desc.serialize_meta(serializer) def get_value(self, agent_id, desc_domain, selector): log.debug("GETVALUE: %s %s:%s", agent_id, desc_domain, selector) if not self._check_agent_id(agent_id): return None if not format_check.is_valid_domain(desc_domain): return None if not format_check.is_valid_selector(selector): return None value = self.store.get_value(str(desc_domain), str(selector)) if value is None: return "" return serializer.dumps(value) def list_uuids(self, agent_id, desc_domain): log.debug("LISTUUIDS: %s %s", agent_id, desc_domain) if not self._check_agent_id(agent_id): return {} if not format_check.is_valid_domain(desc_domain): return {} return self.store.list_uuids(str(desc_domain)) def find(self, agent_id, desc_domain, selector_regex, limit=0, offset=0): log.debug("FIND: %s %s:%s (max %d skip %d)", agent_id, desc_domain, selector_regex, limit, offset) if not self._check_agent_id(agent_id): return [] if not format_check.is_valid_domain(desc_domain): return [] return self.store.find(str(desc_domain), str(selector_regex), int(limit), int(offset)) def find_by_selector(self, agent_id, desc_domain, selector_prefix, limit=0, offset=0): log.debug("FINDBYSELECTOR: %s %s %s (max %d skip %d)", agent_id, desc_domain, selector_prefix, limit, offset) if not self._check_agent_id(agent_id): return [] if not format_check.is_valid_domain(desc_domain): return [] descs = self.store.find_by_selector(str(desc_domain), str(selector_prefix), int(limit), int(offset)) return [desc.serialize_meta(serializer) for desc in descs] def find_by_uuid(self, agent_id, desc_domain, uuid): log.debug("FINDBYUUID: %s %s:%s", agent_id, desc_domain, uuid) if not self._check_agent_id(agent_id): return [] if not format_check.is_valid_domain(desc_domain): return [] descs = self.store.find_by_uuid(str(desc_domain), str(uuid)) return [desc.serialize_meta(serializer) for desc in descs] def find_by_value(self, agent_id, desc_domain, selector_prefix, value_regex): log.debug("FINDBYVALUE: %s %s %s %s", agent_id, desc_domain, selector_prefix, value_regex) if not self._check_agent_id(agent_id): return [] if not format_check.is_valid_domain(desc_domain): return [] descs = self.store.find_by_value(str(desc_domain), str(selector_prefix), str(value_regex)) return [desc.serialize_meta(serializer) for desc in descs] def mark_processed(self, agent_id, desc_domain, selector): if not self._check_agent_id(agent_id): return if not format_check.is_valid_domain(desc_domain): return if not format_check.is_valid_fullselector(selector): return agent_name = self.agentnames[agent_id] options = self.agents_output_altering_options[agent_id] log.debug("MARK_PROCESSED: %s:%s %s %s", desc_domain, selector, agent_id, options) isnew = self.store.mark_processed(str(desc_domain), str(selector), agent_name, str(options)) if isnew: self._update_check_idle(agent_name, options) def mark_processable(self, agent_id, desc_domain, selector): if not self._check_agent_id(agent_id): return if not format_check.is_valid_domain(desc_domain): return if not format_check.is_valid_fullselector(selector): return agent_name = self.agentnames[agent_id] options = self.agents_output_altering_options[agent_id] log.debug("MARK_PROCESSABLE: %s:%s %s %s", desc_domain, selector, agent_id, options) isnew = self.store.mark_processable(str(desc_domain), str(selector), agent_name, str(options)) if isnew: self._update_check_idle(agent_name, options) def get_processable(self, agent_id, desc_domain, selector): log.debug("GET_PROCESSABLE: %s:%s %s", desc_domain, selector, agent_id) if not self._check_agent_id(agent_id): return [] if not format_check.is_valid_domain(desc_domain): return [] if not format_check.is_valid_fullselector(selector): return [] return self.store.get_processable(str(desc_domain), str(selector)) def list_agents(self, agent_id): log.debug("LIST_AGENTS: %s", agent_id) if not self._check_agent_id(agent_id): return {} #: maps agent name to number of instances of this agent counts = dict( Counter( objpath.rsplit('/', 1)[1] for objpath in self.clients.values())) return counts def processed_stats(self, agent_id, desc_domain): log.debug("PROCESSED_STATS: %s %s", agent_id, desc_domain) if not self._check_agent_id(agent_id): return [] if not format_check.is_valid_domain(desc_domain): return [] return self.store.processed_stats(str(desc_domain)) def get_children(self, agent_id, desc_domain, selector, recurse): log.debug("GET_CHILDREN: %s %s:%s", agent_id, desc_domain, selector) if not self._check_agent_id(agent_id): return [] if not format_check.is_valid_domain(desc_domain): return [] if not format_check.is_valid_fullselector(selector): return [] return list( self.store.get_children(str(desc_domain), str(selector), serializer=serializer, recurse=bool(recurse))) def store_internal_state(self, agent_id, state): if not self._check_agent_id(agent_id): return agent_name = self.agentnames[str(agent_id)] log.debug("STORE_INTSTATE: %s", agent_name) if self.store.STORES_INTSTATE: self.store.store_agent_state(agent_name, str(state)) def load_internal_state(self, agent_id): if not self._check_agent_id(agent_id): return "" agent_name = self.agentnames[str(agent_id)] log.debug("LOAD_INTSTATE: %s", agent_name) if self.store.STORES_INTSTATE: return self.store.load_agent_state(agent_name) return "" def request_processing(self, agent_id, desc_domain, selector, targets): log.debug("REQUEST_PROCESSING: %s %s:%s targets %s", agent_id, desc_domain, selector, [str(t) for t in targets]) if not self._check_agent_id(agent_id): return if not format_check.is_valid_domain(desc_domain): return if not format_check.is_valid_fullselector(selector): return d = self.store.get_descriptor(str(desc_domain), str(selector)) self.userrequestid += 1 self._targeted_descriptor(agent_id, desc_domain, d.uuid, selector, targets, self.userrequestid) def _new_descriptor(self, sender_id, desc_domain, uuid, selector): args = locals() args.pop('self', None) self._send_signal("new_descriptor", args) def _targeted_descriptor(self, sender_id, desc_domain, uuid, selector, targets, user_request): """ Signal sent when a descriptor is sent to some target agents (not broadcast). Useful for: * Forcefully replaying a descriptor (debug purposes, or user request) * Feeding descriptors to a new agent. Used when resuming the bus. * Interactive mode - user may choose which selectors get send to each agent :param sender_id: sender id :param desc_domain: descriptor domain :param uuid: descriptor uuid :param selector: descriptor selector :param targets: list of target agent names. Agents not in this list should ignore this descriptor. :param user_request: True if this is a user request targeting agents running in interactive mode. """ args = locals() args.pop('self', None) self._send_signal("targeted_descriptor", args) def _bus_exit(self, awaiting_internal_state): """ Signal sent when the bus is exiting. :param awaiting_internal_state: indicates whether agents must send their internal serialized state for storage. """ args = locals() args.pop('self', None) self._send_signal("bus_exit", args) self.exiting = True return def _on_idle(self): """ Signal sent when the bus is idle, i.e. all descriptors have been marked as processed or processable by agents. """ args = locals() args.pop('self', None) self._send_signal("on_idle", args) def _reconnect(self): b = False while not b: try: log.info("Re-connecting to rabbitmq server at: " + str(self.server_addr)) self.connection = pika.BlockingConnection(self.params) self.channel = self.connection.channel() self.channel.queue_declare(queue="registration_queue") self.signal_exchange = self.channel.exchange_declare( exchange='rebus_signals', exchange_type='fanout') self.channel.queue_declare(queue='rebus_master_rpc_highprio') self.channel.basic_consume(self._rpc_callback, queue='rebus_master_rpc_highprio', arguments={'x-priority': 1}) self.channel.queue_declare(queue='rebus_master_rpc_lowprio') self.channel.basic_consume(self._rpc_callback, queue='rebus_master_rpc_lowprio', arguments={'x-priority': 0}) b = True except pika.exceptions.ConnectionClosed: log.info("Failed to reconnect to RabbitMQ. Retrying..") time.sleep(0.5) @classmethod def run(cls, store, master_options): server_addr = master_options.rabbitaddr heartbeat_interval = master_options.heartbeat svc = cls(store, server_addr, heartbeat_interval) log.info("Entering main loop.") try: while True: try: svc.channel.start_consuming() except pika.exceptions.ConnectionClosed: log.info("Disconnected (in run). Trying to reconnect") cls._reconnect() except (KeyboardInterrupt, SystemExit): log.info("Received SIGINT or Ctrl-C, exiting") svc.channel.queue_delete(queue='registration_queue') if len(svc.clients) > 0: log.info("Trying to stop all agents properly. Press Ctrl-C " "again to stop.") # stop scheduler svc.sched.shutdown() # ask slave agents to shutdown nicely & save internal state log.info("Expecting %u more agents to exit (ex. %s)", len(svc.clients), svc.clients.keys()[0]) svc._bus_exit(store.STORES_INTSTATE) store.store_state() try: while True: try: svc.channel.start_consuming() if len(svc.clients) == 0: break except pika.exceptions.ConnectionClosed: log.info("Disconnected. Trying to reconnect") cls._reconnect() except (KeyboardInterrupt, SystemExit): if len(svc.clients) > 0: log.info( "Not all agents have stopped, exiting nonetheless") svc.channel.cancel() svc.channel.close() svc.connection.close() log.info("Stopping storage...") store.store_state() @staticmethod def _sigterm_handler(sig, frame): # Try to exit cleanly the first time; if that does not work, exit. # raises SystemExit, caught in run() sys.exit(0) @staticmethod def add_arguments(subparser): subparser.add_argument( "--rabbitaddr", default="amqp://localhost", help="URL prefix (scheme+authority) of the rabbitmq server") subparser.add_argument("--heartbeat", help="Rabbitmq heartbeat interval, in seconds", default=0) def _busthread_call(self, method, *args): f = lambda: method(*args) self.connection.add_timeout(0, f) def _sched_inject(self, agent_id, desc_domain, uuid, selector, target): """ Called by Sched object, from Timer thread. Emits targeted_descriptor through bus thread. """ self._busthread_call( self._targeted_descriptor, *(agent_id, desc_domain, uuid, selector, [target], False))
class LocalBus(Bus): _name_ = "localbus" def __init__(self, options): Bus.__init__(self) #: stores currently held locks [(lockid, domain, selector)] self.locks = defaultdict(set) #: Next available agent id. Never decreases. self.agent_count = 0 self.store = RAMStorage() # TODO add support for DiskStorage ? # TODO save internal state at bus exit (only useful with DiskStorage) #: maps agentid (ex. inject-12) to agentdesc self.agent_descs = {} #: maps agentid to agent instance self.agents = {} self.threads = [] #: maps agentids to their serialized configuration - output altering #: options only self.agents_output_altering_options = {} #: maps agentids to their serialized configuration self.agents_full_config_txts = {} #: monotonically increasing user request counter self.userrequestid = 0 #: retry_counters[(agent_name, config_txt, domain, selector)] = \ #: number of remaining retries self.retry_counters = defaultdict(dict) self.sched = Sched(self._sched_inject) def join(self, agent, agent_domain=DEFAULT_DOMAIN): agid = "%s-%i" % (agent.name, self.agent_count) self.agent_count += 1 self.agents_full_config_txts[agid] = agent.config_txt self.agents_output_altering_options[agid] = \ get_output_altering_options(agent.config_txt) self.agent_descs[agid] = agent_desc(agid, agent_domain) self.agents[agid] = agent return agid def lock(self, agent_id, lockid, desc_domain, selector): key = (lockid, desc_domain, selector) log.info("LOCK:%s %s => %r %s:%s", lockid, agent_id, key in self.locks[desc_domain], desc_domain, selector) if key in self.locks[desc_domain]: return False self.locks[desc_domain].add(key) return True def unlock(self, agent_id, lockid, desc_domain, selector, processing_failed, retries, wait_time): lkey = (lockid, desc_domain, selector) log.info("UNLOCK:%s %s => %r %s:%s", lockid, agent_id, lkey in self.locks[desc_domain], desc_domain, selector) if lkey not in self.locks[desc_domain]: return self.locks[desc_domain].remove(lkey) agent_name = self.agents[agent_id].name config_txt = self.agents_output_altering_options[agent_id] rkey = (agent_name, config_txt, desc_domain, selector) if rkey not in self.retry_counters: self.retry_counters[rkey] = retries if self.retry_counters[rkey] > 0: self.retry_counters[rkey] -= 1 desc = self.store.get_descriptor(desc_domain, selector) uuid = desc.uuid self.sched.add_action( wait_time, (agent_id, desc_domain, uuid, selector, agent_name)) def push(self, agent_id, descriptor): desc_domain = descriptor.domain selector = descriptor.selector if self.store.add(descriptor): log.info("PUSH: %s => %s:%s", agent_id, desc_domain, selector) for agid in self.agents: try: log.debug("Calling %s's on_new_descriptor", agid) self.agents[agid].on_new_descriptor( agent_id, desc_domain, descriptor.uuid, selector, 0) except Exception as e: log.error("ERROR agent [%s]: %s", agid, e, exc_info=1) else: log.info("PUSH: %s already seen => %s:%s", agent_id, desc_domain, selector) def get(self, agent_id, desc_domain, selector): log.info("GET: %s %s:%s", agent_id, desc_domain, selector) return self.store.get_descriptor(desc_domain, selector) def get_value(self, agent_id, desc_domain, selector): log.info("GET: %s %s:%s", agent_id, desc_domain, selector) return self.store.get_value(desc_domain, selector) def list_uuids(self, agent_id, desc_domain): log.debug("LISTUUIDS: %s %s", agent_id, desc_domain) return self.store.list_uuids(desc_domain) def find(self, agent_id, desc_domain, selector_regex, limit=0, offset=0): log.debug("FIND: %s %s:%s (max %d skip %d)", agent_id, desc_domain, selector_regex, limit, offset) return self.store.find(desc_domain, selector_regex, limit, offset) def find_by_selector(self, agent_id, desc_domain, selector_prefix, limit=0, offset=0): log.debug("FINDBYVALUE: %s %s %s (max %d skip %d)", agent_id, desc_domain, selector_prefix, limit, offset) return self.store.find_by_selector(desc_domain, selector_prefix, limit, offset) def find_by_uuid(self, agent_id, desc_domain, uuid): log.debug("FINDBYUUID: %s %s:%s", agent_id, desc_domain, uuid) return self.store.find_by_uuid(desc_domain, uuid) def find_by_value(self, agent_id, desc_domain, selector_prefix, value_regex): log.debug("FINDBYVALUE: %s %s %s %s", agent_id, desc_domain, selector_prefix, value_regex) return self.store.find_by_value(desc_domain, selector_prefix, value_regex) def mark_processed(self, agent_id, desc_domain, selector): agent_name = self.agents[agent_id].name config_txt = self.agents_output_altering_options[agent_id] log.debug("MARK_PROCESSED: %s:%s %s %s", desc_domain, selector, agent_id, config_txt) self.store.mark_processed(desc_domain, selector, agent_name, config_txt) def mark_processable(self, agent_id, desc_domain, selector): agent_name = self.agents[agent_id].name config_txt = self.agents_output_altering_options[agent_id] log.debug("MARK_PROCESSABLE: %s:%s %s %s", desc_domain, selector, agent_id, config_txt) self.store.mark_processable(desc_domain, selector, agent_name, config_txt) def get_processable(self, agent_id, desc_domain, selector): log.debug("GET_PROCESSABLE: %s:%s %s", desc_domain, selector, agent_id) return self.store.get_processable(desc_domain, selector) def list_agents(self, agent_id): log.debug("LIST_AGENTS: %s", agent_id) return dict(Counter(i.rsplit('-', 1)[0] for i in self.agent_descs)) def processed_stats(self, agent_id, desc_domain): log.debug("PROCESSED_STATS: %s %s", agent_id, desc_domain) return self.store.processed_stats(desc_domain) def get_children(self, agent_id, desc_domain, selector, recurse=True): log.info("GET_CHILDREN: %s %s:%s", agent_id, desc_domain, selector) return list(self.store.get_children(desc_domain, selector, recurse)) def store_internal_state(self, agent_id, state): log.debug("STORE_INTSTATE: %s", agent_id) if self.store.STORES_INTSTATE: agent_name = self.agents[agent_id].name self.store.store_agent_state(agent_name, str(state)) def load_internal_state(self, agent_id): log.debug("LOAD_INTSTATE: %s", agent_id) if self.store.STORES_INTSTATE: agent_name = self.agents[agent_id].name return self.store.load_agent_state(agent_name) return "" def request_processing(self, agent_id, desc_domain, selector, targets): log.debug("REQUEST_PROCESSING: %s %s:%s target %s", agent_id, desc_domain, selector, targets) self.userrequestid += 1 d = self.store.get_descriptor(desc_domain, selector) for agid in self.agents: if self.agents[agid].name in targets: try: log.debug( "Calling %s on_new_descriptor for user-requested" " processing", agid) self.agents[agid].on_new_descriptor( agent_id, desc_domain, d.uuid, selector, self.userrequestid) except Exception as e: log.error("ERROR agent [%s]: %s", agid, e, exc_info=1) def busthread_call(self, method, *params): # Caution - there are several bus threads with this mode - typically 1 # per inject thread. method(*params) def _sched_inject(self, agent_id, desc_domain, uuid, selector, target): """ Called by Sched object, from Timer thread. Emits targeted_descriptor through bus thread. """ self.busthread_call(self.agents[agent_id].on_new_descriptor, *(agent_id, desc_domain, uuid, selector, 0)) def run_agents(self): for agent in self.agents.values(): t = threading.Thread(target=agent.run_and_catch_exc) t.daemon = True t.start() self.threads.append(t) for t in self.threads: t.join() new_descs = True while new_descs: new_descs = False for agent in self.agents.values(): new_descs = new_descs or agent.on_idle()