def test_removal_candidate_agents(self): # a1 is removed candidate_computations = _removal_candidate_agents(['a1'], self.discovery) self.assertSetEqual(set(candidate_computations), {'a2', 'a5'}) # both a1 and a2 are removed candidate_computations = _removal_candidate_agents(['a1', 'a2'], self.discovery) self.assertSetEqual(set(candidate_computations), {'a3', 'a6', 'a5'})
def _agents_removal(self, leaving_agents: List[str]): # Now inform other agents of the list of agents that left the system # This replace a proper discovery mechanism candidates_agents = _removal_candidate_agents(leaving_agents, self.discovery) orphaned = _removal_orphaned_computations(leaving_agents, self.discovery) orphaned_replicas = { o: self.discovery.replica_agents(o) for o in orphaned } self.logger.info( 'On removal of agents %s, orphaned computations: %s ' 'with candidates %s', leaving_agents, orphaned_replicas, candidates_agents) for o, hs in orphaned_replicas.items(): if not hs: self.logger.error( 'Orphaned computation %s has no known ' 'replica: will not be repaired', o) self._comps_state.update({c: None for c in orphaned}) # For removal, agents that must be informed are agents that possess a # replica of one of the orphaned computation. for candidate in candidates_agents: info = _removal_candidate_agt_info(candidate, leaving_agents, self.graph, self.discovery) self.logger.debug('Info for candidate agent %s : %s', candidate, info) msg = SetupRepairMessage(info) self._send_mgt_msg(candidate, msg) self._agts_state[candidate] = 'repair_setup'
def _agents_removal(self, leaving_agents: List[str]): # Now inform other agents of the list of agents that left the system # This replace a proper discovery mechanism candidates_agents = _removal_candidate_agents( leaving_agents, self.discovery) orphaned = _removal_orphaned_computations(leaving_agents, self.discovery) # Dump stats for this event f_name = 'events.yaml' self.removal_time = perf_counter() - self.start_time with open(f_name, mode='a', encoding='utf-8') as f: f.write(f"{self.removal_time}, {self.dist_count}, {len(candidates_agents)}," f" {len(orphaned)}\n") if not orphaned: # If the departed agent was not hosting any computation, simply resume the # system self.logger.info("No orphaned computation, resuming computations ") self._dump_repair_metrics("OK", 0) if not self._orchestrator.repair_only: self._request_resume() self.dist_count += 1 self.repair_metrics.clear() return orphaned_replicas = {o: self.discovery.replica_agents(o) for o in orphaned} self.logger.info('On removal of agents %s, orphaned computations: %s ' 'with candidates %s', leaving_agents, orphaned_replicas, candidates_agents) for o, hs in orphaned_replicas.items(): if not hs: self.logger.error('Orphaned computation %s has no known ' 'replica: will not be repaired', o) self._comps_state.update({c: None for c in orphaned}) # For removal, agents that must be informed are agents that possess a # replica of one of the orphaned computation. for candidate in candidates_agents: info = _removal_candidate_agt_info( candidate, leaving_agents, self.graph, self.discovery) self.logger.debug('Info for candidate agent %s : %s', candidate, info) msg = SetupRepairMessage(info) self._send_mgt_msg(candidate, msg) self._agts_state[candidate] = 'repair_setup'