def test_several_periodic_action_on_computation(): a = Agent("a", MagicMock()) class TestComputation(MessagePassingComputation): def __init__(self): super().__init__("test") self.mock1 = MagicMock() self.mock2 = MagicMock() def on_start(self): self.add_periodic_action(0.1, self.action1) self.add_periodic_action(0.2, self.action2) def action1(self): self.mock1() def action2(self): self.mock2() c = TestComputation() a.add_computation(c) a.start() a.run() sleep(0.25) a.stop() assert 1 <= c.mock1.call_count <= 2 assert c.mock2.call_count == 1
def test_periodic_action_not_called_when_paused(): a = Agent("a", MagicMock()) class TestComputation(MessagePassingComputation): def __init__(self): super().__init__("test") self.mock = MagicMock() def on_start(self): self.add_periodic_action(0.1, self.action) def action(self): self.mock() c = TestComputation() a.add_computation(c) a.start() a.run() sleep(0.25) assert 1 <= c.mock.call_count <= 2 c.mock.reset_mock() a.pause_computations("test") sleep(0.25) assert c.mock.call_count == 0 a.stop()
def test_oneshot_delayed_action_on_computation(): # To implement a one-shot action, add a periodic action and remove it # the first time it is called: a = Agent("a", MagicMock()) class TestComputation(MessagePassingComputation): def __init__(self): super().__init__("test") self.mock = MagicMock() def on_start(self): self.handle = self.add_periodic_action(0.1, self.action) def action(self): self.mock() self.remove_periodic_action(self.handle) c = TestComputation() a.add_computation(c) a.start() a.run() sleep(0.25) # the action is remove on firts call there must be one single call assert c.mock.call_count == 1 c.mock.reset_mock() sleep(0.2) c.mock.assert_not_called() a.stop()
def test_remove_periodic_action_on_computation(): a = Agent("a", MagicMock()) class TestComputation(MessagePassingComputation): def __init__(self): super().__init__("test") self.mock = MagicMock() def on_start(self): self.handle = self.add_periodic_action(0.1, self.action) def action(self): self.mock() def test_remove(self): self.remove_periodic_action(self.handle) c = TestComputation() a.add_computation(c) a.start() a.run() sleep(0.25) assert c.mock.call_count == 2 c.test_remove() c.mock.reset_mock() sleep(0.5) c.mock.assert_not_called() a.stop()
def directory_discovery(): # Agent hosting the directory agt_dir = Agent('agt_dir', InProcessCommunicationLayer()) directory = Directory(agt_dir.discovery) agt_dir.add_computation(directory.directory_computation) agt_dir.discovery.use_directory('agt_dir', agt_dir.address) agt_dir.start() agt_dir.run(directory.directory_computation.name) # standard agents agt1 = Agent('agt1', InProcessCommunicationLayer()) agt1.discovery.use_directory('agt_dir', agt_dir.address) agt1.start() agt2 = Agent('agt2', InProcessCommunicationLayer()) agt2.discovery.use_directory('agt_dir', agt_dir.address) agt2.start() yield agt_dir, agt1, agt2 for c in agt1.computations(): agt1.remove_computation(c.name) for c in agt1.discovery.agent_computations(agt1.name): agt1.discovery.unregister_computation(c) for c in agt2.computations(): agt2.remove_computation(c.name) for c in agt2.discovery.agent_computations(agt2.name): agt2.discovery.unregister_computation(c) wait_run() agt1.stop() agt2.stop() agt_dir.stop()
def test_periodic_action_on_computation(): a = Agent("a", MagicMock()) class TestComputation(MessagePassingComputation): def __init__(self): super().__init__("test") self.mock = MagicMock() def on_start(self): self.add_periodic_action(0.1, self.action) def action(self): self.mock() c = TestComputation() a.add_computation(c) a.start() a.run() sleep(0.25) a.stop() assert c.mock.call_count == 2
def agents(): # Agent hosting the directory agt_dir = Agent('agt_dir', InProcessCommunicationLayer()) directory = Directory(agt_dir.discovery) agt_dir.add_computation(directory.directory_computation) agt_dir.discovery.use_directory('agt_dir', agt_dir.address) agt_dir.start() agt_dir.run(directory.directory_computation.name) # standard agents agt1 = Agent('agt1', InProcessCommunicationLayer()) agt1.discovery.use_directory('agt_dir', agt_dir.address) agt1.start() agt2 = Agent('agt2', InProcessCommunicationLayer()) agt2.discovery.use_directory('agt_dir', agt_dir.address) agt2.start() yield agt_dir, agt1, agt2 agt1.stop() agt2.stop() agt_dir.stop()
def agent(): agt = Agent('agt1', InProcessCommunicationLayer()) yield agt agt.stop()
class Orchestrator(object): """ Centralized organisation of the set of agents used to solve a dcop. Notes ----- Central orchestration is only used for bootstrapping the system and to collect metrics. As the orchestrator will generally run in a separate process, it uses an agent object and communicates with other agents using messages. Main responsibilities: * deploying the computations * collecting metrics * running and stopping agents (note that the orchestrator does not create nor start agents, it just request them to run their computations) A typical use scenario: * create and start the agents for the dcop (thread or process based) * create the orchestrator, giving him the dcop and its distribution on agents * deploy the computations * run the computations * stop everything Examples -------- orchestrator.start() orchestrator.deploy_computations() orchestrator.start_replication() # only needed for resilient system orchestrator.run()` orchestrator.stop_agents()` orchestrator.stop()` Parameters ---------- algo: AlgorithmDef, algorithm used to solve the dcop cg: ComputationGraph, computation graph agent_mapping: Distribution, initial distribution of computations on agents comm: CommunicationLayer, An instance of communication layer object dcop: DCOP The DCOP infinity=float infinity collector: Queue A queue used to collect metrics collect_moment: str metrics collection mode (e.g. 'value_change') """ def __init__(self, algo: AlgorithmDef, cg: ComputationGraph, agent_mapping: Distribution, comm: CommunicationLayer, dcop: DCOP, infinity=float('inf'), collector: Queue=None, collect_moment: str='value_change', collect_period: float=None, ui_port: int = None): self._own_agt = Agent(ORCHESTRATOR, comm, ui_port=ui_port) self.directory = Directory(self._own_agt.discovery) self._own_agt.add_computation(self.directory.directory_computation) self._own_agt.discovery.use_directory(ORCHESTRATOR, self._own_agt.address) self.discovery = self._own_agt.discovery self.messaging = comm.messaging self.logger = self._own_agt.logger self.dcop = dcop self.status = 'OK' # For scenario execution self._events_iterator = None self._event_timer = None # type: threading.Timer self._timeout_timer = None self._stopping = threading.Event() self.mgt = AgentsMgt(algo, cg, agent_mapping, dcop, self._own_agt, self, infinity, collector=collector, collect_moment=collect_moment, collect_period=collect_period) @property def address(self): return self._own_agt.address def set_error_handler(self, callback: Callable): """ Set a callback that will be called if the orchestrator thread stops due to an unexpected error. Parameters ---------- callback: a signle-argument callable the callback must accept a single argument, which will the the exception that caused the orchestrator thread to stop. """ self._own_agt.on_fatal_error = callback def start(self): """ Start the orchestrator. Notes ----- The orchestrator, and it's directory, must be started in order to receive registration messages, which means you must always start the orchestrator before the agents. """ self._own_agt.start() self._own_agt.run(self.directory.directory_computation.name) self._own_agt.add_computation(self.mgt, ORCHESTRATOR_MGT) self._own_agt.run(self.mgt.name) def stop(self): """ Stop the orchestrator. Notes ----- Once stopped, the orchestrator will not receive nor send any new message. This means that agents must be stopped before stopping the orchestrator. """ self.logger.info('Requesting orchestrator to stop') self._own_agt.stop() if self._event_timer is not None: self._event_timer.cancel() self._event_timer = None def deploy_computations(self, once_registered=True): """ Deploy the computation for the dcop. The computations are deployed according to the Computation Graph and the initial distribution (given to the Orchestrator's constructor). Parameters ---------- once_registered: bool wait until all agents have registered before starting deployment. """ if once_registered: self.logger.info('Waiting for all registration before deploying ' 'computations') self.mgt.all_registered.wait() self.logger.info('deploying computations') self._mgt_method('_orchestrator_deploy_computations', None) def start_replication(self, k_target: int): """ Ask all agents to replicate their computations. Notes ----- deploy_computations must be called before, otherwise agents have no computation to replicate ! Parameters ---------- k_target: int number of replica for each computation (aka resiliency level). """ # We must be sure computations have been deployed first self.logger.info('Waiting until agents are ready to run before ' 'starting replication') self.mgt.ready_to_run.wait() self.mgt.ready_to_run = threading.Event() self.logger.info('Starting replication') self._mgt_method('_orchestrator_start_replication', k_target) def run(self, scenario: Scenario=None, timeout: Optional[float]=None, repair_only=False): """Run the DCOP, with a scenario if given. When `run()` is called, the orchestrator asks all orchestrated agents to start their computations. If the agents are not ready, the orchestrator automatically waits until agents are ready (i.e. computations have been deployed). Parameters ---------- scenario: Scenario an optional Scenario object whose events will be injected into the system. timeout: float time, in seconds, after which all agents, and the orchestrator itself, must be stopped. """ self.repair_only = repair_only self.logger.info('Waiting until agents are ready to run') self.mgt.ready_to_run.wait() self.logger.info('Requesting agents to run') self._mgt_method('_orchestrator_run_computations', None) if timeout is not None: self.logger.info('Setting timer for %s timeour ', timeout) self._timeout_timer = threading.Timer(timeout, self._on_timeout) self._timeout_timer.daemon = True self._timeout_timer.start() self.mgt.ready_to_run = threading.Event() else: self.logger.info('Not timeout, stop with ctrl+c or on algo end ') if scenario is not None: self.logger.info('Setting scenario ') self._events_iterator = iter(scenario) self._process_event() else: self.logger.info('No scenario ') self.mgt.wait_stop_agents() self._own_agt.clean_shutdown() self._own_agt.join() def stop_agents(self, timeout: float): self.logger.info('Requesting all agents to stop') self._stopping.set() # WARNING: must NOT access the mgt directly, all its action # must be done in the agent's tread. That's the reason we use a msg # here. It must have MSG_MGT type to have higher priority, in case the # orchestrator's queue is full of other messages. if self._event_timer is not None: self._event_timer.cancel() self._event_timer = None self._mgt_method('_orchestrator_stop_agents', None) self.mgt.wait_stop_agents(timeout) self.mgt.ready_to_run.set() def current_global_cost(self): return self.mgt.current_global_cost() def current_solution(self): return self.mgt.current_solution() def end_metrics(self): return self.mgt.global_metrics('END', self.mgt.last_agt_stop_time) def replication_metrics(self): return self.mgt._replication_metrics def wait_ready(self): """Blocks until the Orchestrator is ready to perform another action. This can be used to wait until the dcop has finished running, for example when using a timeout. Notes ----- When calling `wait_ready` after `run()` with a timeout, you may be blocked for a longer time than the timeout, as orchestrator also wait until all agents have stopped. Examples -------- orchestrator.run(timeout=5) orchestrator.wait_ready() """ self.mgt.ready_to_run.wait() return self._own_agt.is_running and not self._stopping.is_set() def _process_event(self): # FIXME: hack too avoid overlapping events waited = [a for a, state in self.mgt._agts_state.items() if state != 'running'] if waited: self.logger.warning(f"Event while agents {waited} are still processing" f" previous event, wait 20 s ") self._event_timer = threading.Timer(20, self._process_event) self._event_timer.start() return try: evt = next(self._events_iterator) except StopIteration: self.logger.info("All events processed for scenario") self._events_iterator = None return if evt.is_delay: self.logger.info('Delay: wait %s s for next event', evt.delay) self._event_timer = threading.Timer(evt.delay, self._process_event) self._event_timer.start() else: self.logger.info('posting event to mgt %s', evt) self._mgt_method('_orchestrator_scenario_event', evt) self._process_event() def _mgt_method(self, method: str, arg: Any): self.messaging.post_msg( ORCHESTRATOR_MGT, ORCHESTRATOR_MGT, Message(method, arg), msg_type=5) def _on_timeout(self): """Run timeout callback""" self.status = "TIMEOUT" self.logger.info("Timeout, requesting agents to stop") self.stop_agents(5) self.mgt.ready_to_run.set()