def test_addresses_are_not_shared_accross_instances(self): comm1 = InProcessCommunicationLayer() comm1.discovery = Discovery('a1', 'addr1') comm2 = InProcessCommunicationLayer() comm2.discovery = Discovery('a2', 'addr2') comm1.discovery.register_agent('a1', comm1) with pytest.raises(UnknownAgent): comm2.discovery.agent_address('a1')
def http_comms(): comm1 = HttpCommunicationLayer(('127.0.0.1', 10001)) comm1.discovery = Discovery('a1', ('127.0.0.1', 10001)) Messaging('a1', comm1) comm2 = HttpCommunicationLayer(('127.0.0.1', 10002)) comm2.discovery = Discovery('a2', ('127.0.0.1', 10002)) Messaging('a2', comm2) comm2.messaging.post_msg = MagicMock() yield comm1, comm2 comm1.shutdown() comm2.shutdown()
def test_computation_agent(): discovery = Discovery('test', 'addr_test') discovery.register_agent('a1', 'addr1') discovery.register_computation('c1', 'a1') discovery.register_computation('c2', 'a1') assert discovery.computation_agent('c1') == 'a1' assert discovery.computation_agent('c2') == 'a1' with pytest.raises(UnknownComputation): discovery.computation_agent('c3')
def test_raise_when_sending_to_unknown_agent_fail_on_send(self): comm1 = InProcessCommunicationLayer() comm1.discovery = Discovery('a1', comm1) full_msg = ('c1', 'c2', 'msg') with pytest.raises(UnknownAgent): comm1.send_msg('a1', 'a2', full_msg, on_error='fail')
def test_msg_to_another_agent(self): comm1 = InProcessCommunicationLayer() Messaging('a1', comm1) comm1.discovery = Discovery('a1', comm1) comm2 = InProcessCommunicationLayer() Messaging('a2', comm2) comm2.discovery = Discovery('a2', comm2) comm2.receive_msg = MagicMock() comm1.discovery.register_agent('a2', comm2) full_msg = ('c1', 'c2', 'msg') comm1.send_msg('a1', 'a2', full_msg) comm2.receive_msg.assert_called_with('a1', 'a2', full_msg)
def test_list_computations(): discovery = Discovery('test', 'addr_test') discovery.register_agent('a1', 'addr1') discovery.register_computation('c1', 'a1') discovery.register_computation('c2', 'a1') assert set(discovery.computations()) == {'c1', 'c2'}
def test_retry_when_sending_to_unknown_agent_retry_on_send(self): comm1 = InProcessCommunicationLayer(None) comm1.discovery = Discovery('a1', comm1) full_msg = ('c1', 'c2', 'msg') assert not comm1.send_msg('a1', 'a2', full_msg, on_error='retry') comm2 = create_autospec(InProcessCommunicationLayer) comm1.discovery.register_agent('a2', comm2) comm2.receive_msg.assert_called_with('a1', 'a2', full_msg)
def __init__(self, name, comm: CommunicationLayer, agent_def: AgentDef = None, ui_port: int = None, daemon: bool = False): self._name = name self.agent_def = agent_def self.logger = logging.getLogger('pydcop.agent.' + name) # Setup communication and discovery self._comm = comm self.discovery = Discovery(self._name, self.address) self._comm.discovery = self.discovery self._messaging = Messaging(name, comm) # Ui server self._ui_port = ui_port self._ui_server = None self.t = Thread(target=self._run, name='thread_' + name) self.t.daemon = daemon self._stopping = threading.Event() self._running = False # _idle means that we have finished to handle all incoming messages self._idle = False self._computations = {} # type: Dict[str, MessagePassingComputation] self.t_active = 0 # time when run the first non-technical computation is run self._run_t = None # time when starting the agent self._start_t = None self._periodic_cb = None self._period = 1000 # List of pause computations, any computation whose name is in this # list will not revceive any message. self.paused_computations = []
def _removal_candidate_computation_info( orphan: str, departed: List[str], cg: ComputationGraph, discovery: Discovery) \ -> Tuple[List[str], Dict[str, str], Dict[str, List[str]]]: """ All info needed by an agent to participate in negotiation about hosting the computation `comp` :param orphan: the candidate computation that must be hosted :param departed: the agent that left the system :param cg: the computation graph :param discovery: the distribution of computation on agents :return: a triple ( candidate_agents, fixed_neighbors, candidates_neighbors) where: * candidate agents is a list of agents that could host this computation * fixed_neighbors is a map comp->agent that indicates, for each neighbor computation of `comp` that is not a candidate (orphaned), its host agent * candidates_neighbors is a map comp -> List[agt] indicating which agent could host each of the neighbor computation that is also a candidate computation. """ orphaned_computation = _removal_orphaned_computations(departed, discovery) candidate_agents = list( discovery.replica_agents(orphan).difference(departed)) fixed_neighbors = {} candidates_neighbors = {} for n in cg.neighbors(orphan): if n == orphan: continue if n in orphaned_computation: candidates_neighbors[n] = \ list(discovery.replica_agents(n).difference(departed)) else: fixed_neighbors[n] = discovery.computation_agent(n) return candidate_agents, fixed_neighbors, candidates_neighbors
def _removal_candidate_computations_for_agt(agt, orphaned_computations, discovery: Discovery): """ :param agt: :param orphaned_computations: :return: The list of orphaned computations that could potentially be hosted on agt (because agt has their replica) """ comps = [] for o in orphaned_computations: if agt in discovery.replica_agents(o): comps.append(o) return comps
def _removal_orphaned_computations(departed: List[str], discovery: Discovery) -> List[str]: """ Build the list of computation orphaned when removing some agents. Parameters ---------- departed: list of str list of agent's names discovery: a Discovery instance information about the current deployment of computations Returns ------- """ orphaned = [] for agt in departed: orphaned += discovery.agent_computations(agt) return orphaned
def _removal_candidate_agents(departed: List[str], discovery: Discovery) \ -> List[str]: """ :param departed: a list of agents :param discovery :return: the candidate agents as a list of agents involved in the reparation process, i.e. candidates that could host one the the orphaned computation from the departed agents """ orphaned = _removal_orphaned_computations(departed, discovery) candidate_agents = [] for o in orphaned: candidate_agents += list(discovery.replica_agents(o)) candidate_agents = list(set(candidate_agents).difference(set(departed))) return candidate_agents
def local_messaging(): comm = InProcessCommunicationLayer() comm.discovery = Discovery('a1', 'addr1') messaging = Messaging('a1', comm) return messaging
def standalone_discovery(): discovery = Discovery('test', 'address') return discovery
class Agent(object): """ Object representing an agent. An agent communicates with other agents though messages, using a `CommunicationLayer` An agent hosts message passing computations and run these computations on its own thread. Notes ----- An agent does not necessarily need to known it's own definition (see agent_def argument) but is needs it for some use like replication in resilient DCOP. Parameters ---------- name: str name of the agent comm: CommunicationLayer object used to send and receive messages agent_def: AgentDef definition of this agent, optional ui_port: int the port on which to run the ui-server. If not given, no ui-server is started. daemon: boolean indicates if the agent should use a daemon thread (defaults to False) See Also -------- MessagePassingComputation, CommunicationLayer """ def __init__(self, name, comm: CommunicationLayer, agent_def: AgentDef = None, ui_port: int = None, daemon: bool = False): self._name = name self.agent_def = agent_def self.logger = logging.getLogger('pydcop.agent.' + name) # Setup communication and discovery self._comm = comm self.discovery = Discovery(self._name, self.address) self._comm.discovery = self.discovery self._messaging = Messaging(name, comm) # Ui server self._ui_port = ui_port self._ui_server = None self.t = Thread(target=self._run, name='thread_' + name) self.t.daemon = daemon self._stopping = threading.Event() self._running = False # _idle means that we have finished to handle all incoming messages self._idle = False self._computations = {} # type: Dict[str, MessagePassingComputation] self.t_active = 0 self._run_t = None # time when starting to run self._periodic_cb = None self._period = 1000 # List of pause computations, any computation whose name is in this # list will not revceive any message. self.paused_computations = [] @property def communication(self) -> CommunicationLayer: """ The communication used by this agent. Returns ------- CommunicationLayer The communication used by this agent. """ return self._comm def add_computation(self, computation: MessagePassingComputation, comp_name=None): """ Add a computation to the agent. The computation will run on this agent thread and receives messages through his Messaging and CommunicationLayer. Parameters ---------- computation: a MessagePassingComputation the computation to be added comp_name: str an optional name for the computation, if not given computation.name will be used. """ comp_name = computation.name if comp_name is None else comp_name self.logger.debug('Add computation %s - %s ', comp_name, self._messaging) computation.message_sender = self._messaging.post_msg self._computations[comp_name] = computation self.discovery.register_computation(comp_name, self.name, self.address) # start lookup for agent hosting a neighbor computation if hasattr(computation, 'computation_def') and \ computation.computation_def is not None: for n in computation.computation_def.node.neighbors: self.discovery.subscribe_computation(n) if hasattr(computation, '_on_value_selection'): computation._on_value_selection = notify_wrap( computation._on_value_selection, partial(self._on_computation_value_changed, computation.name)) if hasattr(computation, '_on_new_cycle'): computation._on_new_cycle = notify_wrap( computation._on_new_cycle, partial(self._on_computation_new_cycle, computation.name)) computation.finished = notify_wrap( computation.finished, partial(self._on_computation_finished, computation.name)) def remove_computation(self, computation: str) -> None: """ Removes a computation from the agent. Parameters ---------- computation: str the name of the computation Raises ------ UnknownComputation If there is no computation with this name on this agent """ try: comp = self._computations.pop(computation) except KeyError: self.logger.error( 'Removing unknown computation %s - current commutations : %s', computation, self._computations) raise UnknownComputation(computation) if comp.is_running: comp.stop() self.logger.debug('Removing computation %s', comp) self.discovery.unregister_computation(computation, self.name) def computations(self, include_technical=False)-> \ List[MessagePassingComputation]: """ Computations hosted on this agent. Parameters ---------- include_technical: bool If True, technical computations (like discovery, etc.) are included in the list. Returns ------- List[MessagePassingComputation] A list of computations hosted on this agents. This list is a copy and can be safely modified. """ if include_technical: return list(self._computations.values()) else: return [ c for c in self._computations.values() if not c.name.startswith('_') ] def computation(self, name: str) -> MessagePassingComputation: """ Get a computation hosted by this agent. Parameters ---------- name: str The name of the computation. Returns ------- The Messaging passing corresponding to the given name. Raises ------ UnknownComputation if the agent has no computation with this name. See Also -------- add_computation """ try: return self._computations[name] except KeyError: self.logger.error('unknown computation %s', name) raise UnknownComputation('unknown computation ' + name) @property def address(self): """ The address this agent can be reached at. The type of the address depends on the instance and type of the CommunicationLayer used by this agent. Returns ------- The address this agent can be reached at. """ return self._comm.address def start(self): """ Starts the agent. One started, an agent will dispatch any received message to the corresponding target computation. Notes ----- Each agent has it's own thread, this will start the agent's thread, run the _on_start callback and waits for message. Incoming message are added to a queue and handled by calling the _handle_message callback. The agent (and its thread) will stop once stop() has been called and he has finished handling the current message, if any. See Also -------- _on_start(), stop() """ if self.is_running: raise AgentException( 'Cannot start agent {}, already running '.format(self.name)) self.logger.info('Starting agent %s ', self.name) self._running = True self.add_computation(self.discovery.discovery_computation) self.t.start() self.discovery.register_agent(self.name, self.address) self.run(self.discovery.discovery_computation.name) def run(self, computations: Optional[Union[str, List[str]]] = None): """ Run computations hosted on this agent. Notes ----- Attempting to start an already running computation is harmless : it will be logged but will not raise an exception. The first time this method is called, timestamp is stored, which is used as a reference when computing metrics. Parameters ---------- computations: Optional[Union[str, List[str]]] An optional computation name or list of computation names. If None, all computations hosted on this agent are started. Raises ------ AgentException If the agent was not started (using agt.start()) before calling run(). UnknownComputation If some of the computations are not hosted on this agent. All computations really hosted on the agent are started before raising this Exception. """ if not self.is_running: raise AgentException( 'Cannot start computation on agent %s which ' 'is not started', self.name) if computations is None: self.logger.info('Starting all computations') else: if isinstance(computations, str): computations = [computations] else: # avoid modifying caller's variable computations = computations[:] self.logger.info('Starting computations %s', computations) if self._run_t is None: self._run_t = perf_counter() on_start_t = perf_counter() for c in self._computations.values(): if computations is None: if c.is_running: self.logger.debug('Do not start computation %s, already ' 'running') else: c.start() elif c.name in computations: if c.is_running: self.logger.debug('Do not start computation %s, already ' 'running') else: c.start() computations.remove(c.name) # add the time spent in on_start to the active time of the agent. self.t_active += perf_counter() - on_start_t if computations: raise UnknownComputation('Could not start unknown computation %s', computations) @property def start_time(self) -> float: """ float: timestamp for the first run computation call. This timestamp is used as a reference when computin various time-related metrics. """ return self._run_t def stop(self): """ Stops the agent A computation cannot be interrupted while it handle a message, as a consequence the agent (and its thread) will stop once it he has finished handling the current message, if any. """ self.logger.debug('Stop requested on %s', self.name) self._stopping.set() def pause_computations(self, computations: Union[str, Optional[List[str]]]): """ Pauses computations. Parameters ---------- computations: Union[str, Optional[List[str]]] The name of the computation to pause, or a list of computations names. If None, all hosted computation will be paused. Raises ------ AgentException If the agent was not started (using agt.start()) before calling pause_computations(). UnknownComputation If some of the computations are not hosted on this agent. All computations really hosted on the agent are paused before raising this exception. """ if not self.is_running: raise AgentException('Cannot pause computations on agent %s which ' 'is not started') if computations is None: self.logger.info('Pausing all computations') else: if isinstance(computations, str): computations = [computations] else: computations = computations[:] self.logger.info('Pausing computations %s', computations) for c in self._computations.values(): if computations is None: if c.is_paused: self.logger.warning( 'Cannot pause computation %s, already ' 'paused', c.name) else: c.pause(True) elif c.name in computations: if c.is_paused: self.logger.warning( 'Cannot pause computation %s, already ' 'paused', c.name) else: c.pause(True) computations.remove(c.name) if computations: raise UnknownComputation('Could not pause unknown computation %s', computations) def unpause_computations(self, computations: Union[str, Optional[List[str]]]): """ Un-pause (i.e. resume) computations Parameters ---------- computations: Optional[List[str]] TThe name of the computation to resume, or a list of computations names. If None, all hosted computations will be resumed. Raises ------ AgentException If the agent was not started (using agt.start()) before calling unpause_computations(). UnknownComputation If some of the computations are not hosted on this agent. All computations really hosted on the agent are resumed before raising this exception. """ if not self.is_running: raise AgentException( 'Cannot resume computations on agent %s which ' 'is not started') if computations is None: self.logger.info('Resuming all computations') else: if isinstance(computations, str): computations = [computations] else: computations = computations[:] self.logger.info('Resuming computations %s', computations) for c in self._computations.values(): if computations is None: if not c.is_paused: self.logger.warning( 'Do not resume computation %s, not ' 'paused', c.name) else: c.pause(False) elif c.name in computations: if not c.is_paused: self.logger.warning( 'Do not resume computation %s, not ' 'paused', c.name) else: c.pause(False) computations.remove(c.name) if computations: raise UnknownComputation('Could not resume unknown computation %s', computations) @property def name(self): """ str: The name of the agent. """ return self._name @property def is_stopping(self) -> bool: """ bool: True if the agent is currently stopping (i.e. handling its last message). """ return self._stopping.is_set() @property def is_running(self): """ bool: True if the agent is currently running. """ return self._running def _on_start(self): """ This method is called when the agent starts. It is meant to be overwritten in subclasses that might need to perform some operations on startup. Do NOT forget to call `super()._on_start()` ! Notes ----- This method is always run in the agent's thread, even though thet `start()` method is called from an other thread. """ self.logger.debug('on_start for {}'.format(self.name)) if self._ui_port: self._ui_server = UiServer(self, self._ui_port) self.add_computation(self._ui_server) self._ui_server.start() def _on_stop(self): """ This method is called when the agent has stopped. It is meant to be overwritten in subclasses that might need to perform some operations on stop, however, when overwriting it, you MUST call `super()._on_stop()`. Notes ----- This method always run in the agent's thread. Messages can still be sent in this message, but no new message will be received (as the agent's thread has stopped) """ self.logger.debug('on_stop for %s with computations %s ', self.name, self.computations()) # Unregister computations and agent from discovery. # This will also unregister any discovery callbacks this agent may still # have. for comp in self.computations(): comp.stop() if not _is_technical(comp.name): self.discovery.unregister_computation(comp.name) self.discovery.unregister_agent(self.name) def _on_computation_value_changed(self, computation: str, value, cost, cycle): """Called when a computation selects a new value """ pass def _on_computation_new_cycle(self, computation, *args, **kwargs): """Called when a computation starts a new cycle""" pass def _on_computation_finished(self, computation: str, *args, **kwargs): """ Called when a computation finishes. This method is meant to be overwritten in sub-classes. Parameters ---------- computation: str name of the computation that just ended. """ pass def _handle_message(self, sender_name: str, dest_name: str, msg, t): # messages are delivered even to computations which have reached their # stop condition. It's up the the algorithm to decide if it wants to # handle the message. dest = self.computation(dest_name) dest.on_message(sender_name, msg, t) def metrics(self): idle = 0 if self._run_t is None else self._run_t - self.t_active m = { 'count_ext_msg': dict(self._messaging.count_ext_msg), 'size_ext_msg': dict(self._messaging.size_ext_msg), 'last_msg_time': self._messaging.last_msg_time, 'active': self.t_active, 'idle': idle, 'cycles': {c.name: c.cycle_count for c in self.computations()} } return m def set_periodic_action(self, period: float, cb: Callable): """ Set a periodic action. The callback `cb` will be called every `period` seconds. The delay is not strict. The handling of a message is never interrupted, if it takes longer than `period`, the callback will be delayed and will only be called once the task has finished. Parameters ---------- period: float a period in second cb: Callable a callback with no argument """ self._periodic_cb = cb self._period = period return cb def _run(self): self.logger.debug('Running agent ' + self._name) full_msg = None last_cb_time = perf_counter() try: self._running = True self._on_start() while not self._stopping.is_set(): # Process messages, if any full_msg, t = self._messaging.next_msg(0.05) if full_msg is None: self._idle = True else: current_t = perf_counter() try: sender, dest, msg, _ = full_msg self._idle = False if not self._stopping.is_set(): self._handle_message(sender, dest, msg, t) finally: if self._run_t is not None: e = perf_counter() msg_duration = e - current_t self.t_active += msg_duration if msg_duration > 1: self.logger.warning( 'Long message handling (%s) : %s', msg_duration, msg) # Process periodic action. Only once the agents runs the # computations (i.e. self._run_t is not None) ct = perf_counter() if self._run_t is not None \ and self._periodic_cb is not None \ and ct - last_cb_time >= self._period: self.logger.warning('periodic cb %s %s ', ct, last_cb_time) self._periodic_cb() last_cb_time = ct except Exception as e: self.logger.error( 'Thread %s exits With error : %s \n ' 'Was handling message %s ', self.name, e, full_msg) self.logger.error(traceback.format_exc()) if hasattr(self, 'on_fatal_error'): self.on_fatal_error(e) except: # catch *all* exceptions e = sys.exc_info()[0] self.logger.error('Thread exits With un-managed error : %s', e) self.logger.error(e) finally: self._running = False self._comm.shutdown() self._on_stop() self.logger.info('Thread of agent %s stopped', self._name) def is_idle(self): """ Indicate if the agent is idle. An idle agent is an agent which has no pending messages to handle. :return: True if the agent is idle, False otherwise """ return self._idle def __str__(self): return 'Agent: ' + self._name def __repr__(self): return 'Agent: ' + self._name
def test_ignore_when_sending_to_unknown_agent_ignore_on_send(self): comm1 = InProcessCommunicationLayer() comm1.discovery = Discovery('a1', comm1) full_msg = ('c1', 'c2', 'msg') assert comm1.send_msg('a1', 'a2', full_msg, on_error='ignore')
def setUp(self): # A grid-shaped (3x2) computation graph with 6 computations self.l1 = Link(['c1', 'c2']) self.l2 = Link(['c2', 'c3']) self.l3 = Link(['c1', 'c4']) self.l4 = Link(['c2', 'c5']) self.l5 = Link(['c3', 'c6']) self.l6 = Link(['c4', 'c5']) self.l7 = Link(['c5', 'c6']) self.links = [ self.l1, self.l2, self.l3, self.l4, self.l5, self.l6, self.l7 ] nodes = {} for i in range(1, 7): name = 'c' + str(i) nodes[name] = ComputationNode( name, 'test', links=[l for l in self.links if l.has_node(name)]) self.cg = ComputationGraph('test', nodes=nodes.values()) # setattr(self.cg, 'links', [self.l1, self.l2, self.l3, self.l4, # self.l5, self.l6, self.l7]) # # 6 agents hosting these computations d = Discovery('a1', 'addr1') d.register_computation('c1', 'a1', 'addr1', publish=False) d.register_computation('c2', 'a2', 'addr2', publish=False) d.register_computation('c3', 'a3', 'addr3', publish=False) d.register_computation('c4', 'a4', 'addr4', publish=False) d.register_computation('c5', 'a5', 'addr5', publish=False) d.register_computation('c6', 'a8', 'addr8', publish=False) # and the corresponding replica, 2 for each computation d.register_replica('c1', 'a2') d.register_replica('c1', 'a5') d.register_replica('c2', 'a3') d.register_replica('c2', 'a6') d.register_replica('c3', 'a1') d.register_replica('c3', 'a4') d.register_replica('c4', 'a2') d.register_replica('c4', 'a5') d.register_replica('c5', 'a3') d.register_replica('c5', 'a6') d.register_replica('c6', 'a1') d.register_replica('c6', 'a4') self.discovery = d