def day(self, actions, rewards): """ Run the day phase, that is execute target based on votes and reward accordingly or the voting :param actions: dict, map id_ to vote :param rewards: dict, maps agent id_ to curr reward :return: updated rewards """ def execution(actions, rewards): """ To be called when is execution phase :return: """ self.custom_metrics["suicide"] += suicide_num(actions) # get the agent to be executed target = most_frequent(actions) # penalize for non divergent target rewards = self.target_accord(target, rewards, actions) # if target is alive if self.status_map[target]: # log if Params.log_step == self.ep_step: logger.debug(f"Player {target} ({self.role_map[target]}) has been executed") # for every agent alive, [to be executed agent too] for id_ in self.get_ids('all', alive=True): # add/subtract penalty if id_ == target: rewards[id_] += self.penalties.get("death") else: rewards[id_] += self.penalties.get("execution") # kill target self.status_map[target] = 0 else: # penalize agents for executing a dead one for id_ in self.get_ids("all", alive=True): rewards[id_] += self.penalties.get('execute_dead') if Params.log_step == self.ep_step: logger.debug(f"Players tried to execute dead agent {target}") # update day self.day_count += 1 return rewards # call the appropriate method depending on the phase if self.is_comm: if Params.log_step == self.ep_step: logger.debug("Day Time| Voting") return rewards else: if Params.log_step == self.ep_step: logger.debug("Day Time| Executing") rewards = {id_: val + self.penalties.get('day') for id_, val in rewards.items()} return execution(actions, rewards)
def execution(actions, rewards): """ To be called when is execution phase :return: """ self.custom_metrics["suicide"] += suicide_num(actions) # get the agent to be executed target = most_frequent(actions) # penalize for non divergent target rewards = self.target_accord(target, rewards, self.get_ids("all", alive=True)) # if target is alive if self.status_map[target]: # log logger.debug( f"Player {target} ({self.role_map[target]}) has been executed" ) # for every agent alive, [to be executed agent too] for id_ in [ elem for elem in rewards.keys() if self.status_map[elem] ]: # add/subtract penalty if id_ == target: rewards[id_] += self.penalties.get("death") else: rewards[id_] += self.penalties.get("execution") # kill target self.status_map[target] = 0 else: # penalize agents for executing a dead one for id_ in self.get_ids("all", alive=True): rewards[id_] += self.penalties.get('execute_dead') logger.debug(f"Players tried to execute dead agent {target}") # increase the number of dead_man_execution in info self.custom_metrics["dead_man_execution"] += 1 # update day self.day_count += 1 return rewards
def step(self, actions): """Run one timestep of the environment's dynamics. When end of episode is reached, you are responsible for calling `reset()` to reset this environment's state. Accepts an action and returns a tuple (observation, reward, done, info). Args: actions_dict (dict): a list of action provided by the agents Returns: observation (object): agent's observation of the current environment reward (float) : amount of reward returned after previous action done (bool): whether the episode has ended, in which case further step() calls will return undefined results info (dict): contains auxiliary diagnostic information (helpful for debugging, and sometimes learning) """ # rewards start from zero rewards = {id: 0 for id in self.get_ids("all", alive=False)} # execute night action if self.is_night: logger.debug("Night Time") rewards = self.night(actions, rewards) self.is_night = not self.is_night else: # else go with day logger.debug("Day Time") # penalize since a day has passed rewards = {id: val + self.penalties.get('day') for id, val in rewards.items()} rewards = self.day(actions, rewards) self.is_night = not self.is_night # update dones dones, rewards = self.check_done(rewards) obs = self.observe() info = {id: self.infos for id in self.get_ids("all", alive=False)} obs, rewards, dones, info = self.convert(obs, rewards, dones, info) # if game over reset if self.is_done: self.infos["tot_days"] = self.day_count dones["__all__"] = True else: dones["__all__"] = False return obs, rewards, dones, info
def night(self, actions, rewards): """ Is night, time to perform actions! During this phase, villagers action are not considered :param actions: dict, map id_ to vote :param rewards: dict, maps agent id_ to curr reward :return: return updated rewards """ if self.is_comm: logger.debug("Night Time| Voting") else: logger.debug("Night Time| Eating") # execute wolf actions rewards = self.wolf_action(actions, rewards) return rewards
def kill(actions, rewards): # upvote suicide info self.custom_metrics["suicide"] += suicide_num(actions) if not len(wolves_ids): raise Exception( "Game not done but wolves are dead, have reset been called?" ) # get agent to be eaten target = most_frequent(actions) # penalize for different ids rewards = self.target_accord(target, rewards, wolves_ids) # if target is alive if self.status_map[target]: # kill him self.status_map[target] = 0 # penalize dead player rewards[target] += self.penalties.get("death") # reward wolves for id_ in wolves_ids: rewards[id_] += self.penalties.get("kill") logger.debug( f"Wolves killed {target} ({self.role_map[target]})") else: logger.debug(f"Wolves tried to kill dead agent {target}") # penalize the wolves for eating a dead player for id_ in wolves_ids: rewards[id_] += self.penalties.get('execute_dead') # log it self.custom_metrics["dead_man_kill"] += 1 if target in wolves_ids: # penalize the agent for eating one of their kind for id_ in wolves_ids: rewards[id_] += self.penalties.get('kill_wolf') # log it self.custom_metrics["cannibalism"] += 1 return rewards
def wolf_action(self, actions, rewards): """ Perform wolf action, that is kill agent based on votes and reward :param actions: dict, map id to vote :param rewards: dict, maps agent id to curr reward :return: updated rewards """ # get wolves ids wolves_ids = self.get_ids(ww, alive=True) # filter action to get only wolves actions = {k: v for k, v in actions.items() if k in wolves_ids} # upvote suicide info self.infos["suicide"] += suicide_num(actions) if not len(wolves_ids): raise Exception("Game not done but wolves are dead") # get choices by wolves actions = [actions[id] for id in wolves_ids] logger.debug(f"wolves votes :{actions}") # get agent to be eaten target = most_frequent(actions) # if target is alive if self.status_map[target]: # kill him self.status_map[target] = 0 # penalize dead player rewards[target] += self.penalties.get("death") # reward wolves for id in wolves_ids: rewards[id] += self.penalties.get("kill") logger.debug(f"Wolves killed {target} ({self.role_map[target]})") else: logger.debug(f"Wolves tried to kill dead agent {target}") # penalize the wolves for eating a dead player for id in wolves_ids: rewards[id] += self.penalties.get('execute_dead') # log it self.infos["dead_man_kill"] += 1 if target in wolves_ids: # penalize the agent for eating one of their kind for id in wolves_ids: rewards[id] += self.penalties.get('kill_wolf') # log it self.infos["cannibalism"] += 1 return rewards
def day(self, actions, rewards): """ Run the day phase, that is execute target based on votes and reward accordingly :param actions: dict, map id to vote :param rewards: dict, maps agent id to curr reward :return: updated rewards """ # update vote list for idx in range(self.num_players): # use -1 if agent is dead self.votes[idx] = actions.get(idx, -1) self.infos["suicide"] += suicide_num(actions) # get the agent to be executed target = most_frequent(actions) logger.debug(f"Villagers votes {[elem for elem in actions.values()]}") # if target is alive if self.status_map[target]: # log logger.debug(f"Player {target} ({self.role_map[target]}) has been executed") # kill target self.status_map[target] = 0 # for every agent alive for id in [elem for elem in rewards.keys() if self.status_map[elem]]: # add/subtract penalty if id == target: rewards[id] += self.penalties.get("death") else: rewards[id] += self.penalties.get("execution") else: # penalize agents for executing a dead one for id in self.get_ids("all", alive=True): rewards[id] += self.penalties.get('execute_dead') logger.debug(f"Players tried to execute dead agent {target}") # increase the number of dead_man_execution in info self.infos["dead_man_execution"] += 1 # update day self.day_count += 1 return rewards