Ejemplo n.º 1
0
    def step(self, action: Action) -> State:
        # Validate action and robot
        assert self.action_space.contains(action), \
            "%r (%s) invalid" % (action, type(action))

        # Set the action
        ok_action = self.task.set_action(action)
        assert ok_action, "Failed to set the action"

        # TODO: realtime step

        # Get the observation
        observation = self.task.get_observation()
        assert self.observation_space.contains(observation), \
            "%r (%s) invalid" % (observation, type(observation))

        # Get the reward
        # TODO: use the wrapper method?
        reward = self.task.get_reward()
        assert reward, "Failed to get the reward"

        # Check termination
        done = self.task.is_done()

        return State((observation, reward, done, {}))
Ejemplo n.º 2
0
    def step(self, action: Action) -> State:
        if not self.action_space.contains(action):
            logger.warn("The action does not belong to the action space")

        # Set the action
        ok_action = self.task.set_action(action)
        assert ok_action, "Failed to set the action"

        # Step the simulator
        ok_gazebo = self.gazebo.run()
        assert ok_gazebo, "Failed to step gazebo"

        # Get the observation
        observation = self.task.get_observation()

        if not self.observation_space.contains(observation):
            logger.warn(
                "The observation does not belong to the observation space")

        # Get the reward
        # TODO: use the wrapper method?
        reward = self.task.get_reward()
        assert reward is not None, "Failed to get the reward"

        # Check termination
        done = self.task.is_done()

        return State((observation, reward, done, {}))
Ejemplo n.º 3
0
    def step(self, action: Action) -> State:
        if not self.action_space.contains(action):
            logger.warn("The action does not belong to the action space")

        # Set the action
        ok_action = self.task.set_action(action)
        assert ok_action, "Failed to set the action"

        # Step the simulator
        self.pybullet.stepSimulation()

        # Get the observation
        observation = self.task.get_observation()

        if not self.observation_space.contains(observation):
            logger.warn(
                "The observation does not belong to the observation space")

        # Get the reward
        reward = self.task.get_reward()
        assert reward is not None, "Failed to get the reward"

        # Check termination
        done = self.task.is_done()

        # Enforce the real-time factor
        self._enforce_rtf()

        return State((observation, reward, done, {}))
    def step(self, action: np.ndarray) -> State:
        tau_m = action[0]
        theta_ddot = (
            self.m * self.g * self.L / 2.0 * np.sin(self.theta) + tau_m
        ) / self.I

        # Integrate with euler.
        # Note that in this way the ground truth used as comparison implements a very
        # basic integration method, and the errors might be reduced implementing a
        # better integrator.
        self.theta_dot = self.theta_dot + theta_ddot * self.dt
        self.theta = self.theta + self.theta_dot * self.dt

        observation = np.array([np.cos(self.theta), np.sin(self.theta), self.theta_dot])

        return State((Observation(observation), Reward(0.0), False, {}))
Ejemplo n.º 5
0
    def step(self, action: Action) -> State:
        assert self.action_space.contains(action), \
            "The action does not belong to the action space"

        # The bindings do not accept yet numpy types as arguments. We need to covert
        # numpy variables to the closer python type.

        # Check if the input variable is a numpy type
        is_numpy = type(action).__module__ == np.__name__

        if is_numpy:
            if isinstance(action, np.ndarray):
                action = action.tolist()
            elif isinstance(action, np.number):
                action = action.item()
            else:
                assert False

        # Actions must be std::vector objects, so if the passed action is a scalar
        # we have to store it inside a list object before passing it to the bindings
        if isinstance(action, Number):
            action_list = [action]
        else:
            action_list = list(action)

        # Create the gympp::Sample object
        action_buffer = getattr(bindings, 'Vector' + self._act_dt)(action_list)
        action_sample = bindings.Sample(action_buffer)

        # Execute the step and get the std::optional<gympp::State> object
        state_optional = self.gympp_env.step(action_sample)
        assert state_optional.has_value()

        # Get the gympp::State
        state = state_optional.value()

        # Get the std::vector buffer of gympp::Observation
        observation_vector = getattr(state.observation, 'getBuffer' + self._obs_dt)()
        assert observation_vector, "Failed to get the observation buffer"
        assert observation_vector.size() > 0, "The observation does not contain elements"

        # Convert the SWIG type to a list
        observation_list = list(observation_vector)

        # Convert the observation to a numpy array (this is the only required copy)
        if isinstance(self.observation_space, gym.spaces.Box):
            observation = np.array(observation_list)
        elif isinstance(self.observation_space, gym.spaces.Discrete):
            assert observation_vector.size() == 1, "The buffer has the wrong dimension"
            observation = observation_list[0]
        else:
            assert False, "Space not supported"

        assert self.observation_space.contains(observation), \
            "The returned observation does not belong to the space"

        # Create the info dict
        info = {'gympp': state.info}

        # Return the tuple
        return State((observation, state.reward, state.done, info))