예제 #1
0
 def actions(self, states, agent_indices):
     action_probs_n = self.policy.multi_state_policy(states, agent_indices)
     actions_and_infos_n = []
     for action_probs in action_probs_n:
         action = Action.sample(action_probs)
         actions_and_infos_n.append((action, {"action_probs": action_probs}))
     return actions_and_infos_n
예제 #2
0
 def action(self, state):
     action_probs = np.zeros(Action.NUM_ACTIONS)
     legal_actions = list(Action.MOTION_ACTIONS)
     if self.interact:
         legal_actions.append(Action.INTERACT)
     legal_actions_indices = np.array([Action.ACTION_TO_INDEX[motion_a] for motion_a in legal_actions])
     action_probs[legal_actions_indices] = 1 / len(legal_actions_indices)
     return Action.sample(action_probs), {"action_probs": action_probs}
예제 #3
0
    def action(self, state):
        action_probs = np.zeros(Action.NUM_ACTIONS)
        legal_actions = list(Action.MOTION_ACTIONS)
        if self.all_actions:
            legal_actions = Action.ALL_ACTIONS
        legal_actions_indices = np.array([Action.ACTION_TO_INDEX[motion_a] for motion_a in legal_actions])
        action_probs[legal_actions_indices] = 1 / len(legal_actions_indices)

        if self.custom_wait_prob is not None:
            stay = Action.STAY
            if np.random.random() < self.custom_wait_prob:
                return stay, {"action_probs": Agent.a_probs_from_action(stay)}
            else:
                action_probs = Action.remove_indices_and_renormalize(action_probs, [Action.ACTION_TO_INDEX[stay]])

        return Action.sample(action_probs), {"action_probs": action_probs}
예제 #4
0
 def action(self, state):
     action_probs = np.zeros(Action.NUM_ACTIONS)
     for agent in self.agents:
         action_probs += agent.action(state)[1]["action_probs"]
     action_probs = action_probs / len(self.agents)
     return Action.sample(action_probs), {"action_probs": action_probs}