def test_removal_candidate_agents(self):
        # a1 is removed
        candidate_computations = _removal_candidate_agents(['a1'],
                                                           self.discovery)

        self.assertSetEqual(set(candidate_computations), {'a2', 'a5'})

        # both a1 and a2 are removed
        candidate_computations = _removal_candidate_agents(['a1', 'a2'],
                                                           self.discovery)

        self.assertSetEqual(set(candidate_computations), {'a3', 'a6', 'a5'})
Example #2
0
    def _agents_removal(self, leaving_agents: List[str]):
        # Now inform other agents of the list of agents that left the system
        # This replace a proper discovery mechanism
        candidates_agents = _removal_candidate_agents(leaving_agents,
                                                      self.discovery)
        orphaned = _removal_orphaned_computations(leaving_agents,
                                                  self.discovery)
        orphaned_replicas = {
            o: self.discovery.replica_agents(o)
            for o in orphaned
        }
        self.logger.info(
            'On removal of agents %s, orphaned computations: %s '
            'with candidates %s', leaving_agents, orphaned_replicas,
            candidates_agents)
        for o, hs in orphaned_replicas.items():
            if not hs:
                self.logger.error(
                    'Orphaned computation %s has no known '
                    'replica: will not be repaired', o)
        self._comps_state.update({c: None for c in orphaned})

        # For removal, agents that must be informed are agents that possess a
        # replica of one of the orphaned computation.
        for candidate in candidates_agents:
            info = _removal_candidate_agt_info(candidate, leaving_agents,
                                               self.graph, self.discovery)
            self.logger.debug('Info for candidate agent %s : %s', candidate,
                              info)
            msg = SetupRepairMessage(info)
            self._send_mgt_msg(candidate, msg)
            self._agts_state[candidate] = 'repair_setup'
Example #3
0
    def _agents_removal(self, leaving_agents: List[str]):
        # Now inform other agents of the list of agents that left the system
        # This replace a proper discovery mechanism
        candidates_agents = _removal_candidate_agents(
            leaving_agents, self.discovery)
        orphaned = _removal_orphaned_computations(leaving_agents,
                                                  self.discovery)

        # Dump stats for this event
        f_name = 'events.yaml'
        self.removal_time = perf_counter() - self.start_time

        with open(f_name, mode='a', encoding='utf-8') as f:
            f.write(f"{self.removal_time}, {self.dist_count}, {len(candidates_agents)},"
                    f" {len(orphaned)}\n")

        if not orphaned:
            # If the departed agent was not hosting any computation, simply resume the
            # system
            self.logger.info("No orphaned computation, resuming computations ")
            self._dump_repair_metrics("OK", 0)
            if not self._orchestrator.repair_only:
                self._request_resume()
            self.dist_count += 1
            self.repair_metrics.clear()
            return

        orphaned_replicas = {o: self.discovery.replica_agents(o) for o in
                             orphaned}
        self.logger.info('On removal of agents %s, orphaned computations: %s '
                         'with candidates %s',
                         leaving_agents, orphaned_replicas, candidates_agents)
        for o, hs in orphaned_replicas.items():
            if not hs:
                self.logger.error('Orphaned computation %s has no known '
                                  'replica: will not be repaired', o)
        self._comps_state.update({c: None for c in orphaned})

        # For removal, agents that must be informed are agents that possess a
        # replica of one of the orphaned computation.
        for candidate in candidates_agents:
            info = _removal_candidate_agt_info(
                candidate, leaving_agents, self.graph,
                self.discovery)
            self.logger.debug('Info for candidate agent %s : %s', candidate,
                              info)
            msg = SetupRepairMessage(info)
            self._send_mgt_msg(candidate, msg)
            self._agts_state[candidate] = 'repair_setup'