Exemple #1
0
    def step(self, action: List[Any]) -> GymStepResult:
        """Run one timestep of the environment's dynamics. When end of
        episode is reached, you are responsible for calling `reset()`
        to reset this environment's state.
        Accepts an action and returns a tuple (observation, reward, done, info).
        Args:
            action (object/list): an action provided by the environment
        Returns:
            observation (object/list): agent's observation of the current environment
            reward (float/list) : amount of reward returned after previous action
            done (boolean/list): whether the episode has ended.
            info (dict): contains auxiliary diagnostic information.
        """
        if self._flattener is not None:
            # Translate action into list
            action = self._flattener.lookup_action(action)

        action = np.array(action).reshape((1, self.action_size))

        action_tuple = ActionTuple()
        if self.group_spec.action_spec.is_continuous():
            action_tuple.add_continuous(action)
        else:
            action_tuple.add_discrete(action)
        self._env.set_actions(self.name, action_tuple)

        self._env.step()
        decision_step, terminal_step = self._env.get_steps(self.name)
        self._check_agents(max(len(decision_step), len(terminal_step)))
        if len(terminal_step) != 0:
            # The agent is done
            self.game_over = True
            return self._single_step(terminal_step)
        else:
            return self._single_step(decision_step)
Exemple #2
0
    def get_action(
        self, decision_requests: DecisionSteps, worker_id: int = 0
    ) -> ActionInfo:
        """
        Decides actions given observations information, and takes them in environment.
        :param decision_requests: A dictionary of brain names and DecisionSteps from environment.
        :param worker_id: In parallel environment training, the unique id of the environment worker that
            the DecisionSteps came from. Used to construct a globally unique id for each agent.
        :return: an ActionInfo containing action, memories, values and an object
        to be passed to add experiences
        """
        if len(decision_requests) == 0:
            return ActionInfo.empty()

        global_agent_ids = [
            get_global_agent_id(worker_id, int(agent_id))
            for agent_id in decision_requests.agent_id
        ]  # For 1-D array, the iterator order is correct.

        run_out = self.evaluate(  # pylint: disable=assignment-from-no-return
            decision_requests, global_agent_ids
        )

        self.save_memories(global_agent_ids, run_out.get("memory_out"))
        # For Compatibility with buffer changes for hybrid action support
        if "log_probs" in run_out:
            log_probs_tuple = LogProbsTuple()
            if self.behavior_spec.action_spec.is_continuous():
                log_probs_tuple.add_continuous(run_out["log_probs"])
            else:
                log_probs_tuple.add_discrete(run_out["log_probs"])
            run_out["log_probs"] = log_probs_tuple
        if "action" in run_out:
            action_tuple = ActionTuple()
            env_action_tuple = ActionTuple()
            if self.behavior_spec.action_spec.is_continuous():
                action_tuple.add_continuous(run_out["pre_action"])
                env_action_tuple.add_continuous(run_out["action"])
            else:
                action_tuple.add_discrete(run_out["action"])
                env_action_tuple.add_discrete(run_out["action"])
            run_out["action"] = action_tuple
            run_out["env_action"] = env_action_tuple
        self.check_nan_action(run_out.get("action"))
        return ActionInfo(
            action=run_out.get("action"),
            env_action=run_out.get("env_action"),
            value=run_out.get("value"),
            outputs=run_out,
            agent_ids=decision_requests.agent_id,
        )
Exemple #3
0
 def to_action_tuple(self, clip: bool = False) -> ActionTuple:
     """
     Returns an ActionTuple
     """
     action_tuple = ActionTuple()
     if self.continuous_tensor is not None:
         _continuous_tensor = self.continuous_tensor
         if clip:
             _continuous_tensor = torch.clamp(_continuous_tensor, -3, 3) / 3
         continuous = ModelUtils.to_numpy(_continuous_tensor)
         action_tuple.add_continuous(continuous)
     if self.discrete_list is not None:
         discrete = ModelUtils.to_numpy(self.discrete_tensor[:, 0, :])
         action_tuple.add_discrete(discrete)
     return action_tuple
 def set_actions(self, behavior_name, action):
     # The ActionTuple contains the actions for all n_agents. This
     # slices the ActionTuple into an action tuple for each environment
     # and sets it. The index j is used to ignore agents that have already
     # reached done.
     j = 0
     for i in range(self.num_agents):
         _act = ActionTuple()
         name_and_num = behavior_name + str(i)
         env = self.envs[name_and_num]
         if not self.dones[name_and_num]:
             if self.action_spec.continuous_size > 0:
                 _act.add_continuous(action.continuous[j:j + 1])
             if self.action_spec.discrete_size > 0:
                 _disc_list = [action.discrete[j, :]]
                 _act.add_discrete(np.array(_disc_list))
             j += 1
             env.action[behavior_name] = _act
Exemple #5
0
def test_set_action_multi_agent():
    engine_config_channel = EngineConfigurationChannel()
    env = default_registry[BALL_ID].make(
        base_port=6001,
        worker_id=0,
        no_graphics=True,
        side_channels=[engine_config_channel],
    )
    engine_config_channel.set_configuration_parameters(time_scale=100)
    for _ in range(3):
        env.reset()
        behavior_name = list(env.behavior_specs.keys())[0]
        d, t = env.get_steps(behavior_name)
        for _ in range(50):
            action = np.ones((len(d), 2))
            action_tuple = ActionTuple()
            action_tuple.add_continuous(action)
            env.set_actions(behavior_name, action_tuple)
            env.step()
            d, t = env.get_steps(behavior_name)
    env.close()
Exemple #6
0
    def step(self, action: List[Any]) -> GymStepResult:
        """Run one timestep of the environment's dynamics. When end of
        episode is reached, you are responsible for calling `reset()`
        to reset this environment's state.
        Accepts an action and returns a tuple (observation, reward, done, info).
        Args:
            action (object/list): an action provided by the environment
        Returns:
            observation (object/list): agent's observation of the current environment
            reward (float/list) : amount of reward returned after previous action
            done (boolean/list): whether the episode has ended.
            info (dict): contains auxiliary diagnostic information.
        """
        if self._flattener is not None:
            # Translate action into list
            action = self._flattener.lookup_action(action)

        action = np.array(action).reshape((-1, self.action_size))

        action_tuple = ActionTuple()
        if self.group_spec.action_spec.is_continuous():
            action_tuple.add_continuous(action)
        else:
            action_tuple.add_discrete(action)

        self._env.set_actions(self.name, action_tuple)
        self._env.step()

        decision_step, terminal_step = self._env.get_steps(self.name)

        try:
            return self.combine_steps(decision_step, terminal_step)
        except KeyError:
            self.key_error_counter += 1
            # print(f"{self.key_error_counter}th KeyError in UnityToMultiGymWrapper. Previous step returned.")
            return self.last_stepreturn