def step(self, action: Action) -> State: # Validate action and robot assert self.action_space.contains(action), \ "%r (%s) invalid" % (action, type(action)) # Set the action ok_action = self.task.set_action(action) assert ok_action, "Failed to set the action" # TODO: realtime step # Get the observation observation = self.task.get_observation() assert self.observation_space.contains(observation), \ "%r (%s) invalid" % (observation, type(observation)) # Get the reward # TODO: use the wrapper method? reward = self.task.get_reward() assert reward, "Failed to get the reward" # Check termination done = self.task.is_done() return State((observation, reward, done, {}))
def step(self, action: Action) -> State: if not self.action_space.contains(action): logger.warn("The action does not belong to the action space") # Set the action ok_action = self.task.set_action(action) assert ok_action, "Failed to set the action" # Step the simulator ok_gazebo = self.gazebo.run() assert ok_gazebo, "Failed to step gazebo" # Get the observation observation = self.task.get_observation() if not self.observation_space.contains(observation): logger.warn( "The observation does not belong to the observation space") # Get the reward # TODO: use the wrapper method? reward = self.task.get_reward() assert reward is not None, "Failed to get the reward" # Check termination done = self.task.is_done() return State((observation, reward, done, {}))
def step(self, action: Action) -> State: if not self.action_space.contains(action): logger.warn("The action does not belong to the action space") # Set the action ok_action = self.task.set_action(action) assert ok_action, "Failed to set the action" # Step the simulator self.pybullet.stepSimulation() # Get the observation observation = self.task.get_observation() if not self.observation_space.contains(observation): logger.warn( "The observation does not belong to the observation space") # Get the reward reward = self.task.get_reward() assert reward is not None, "Failed to get the reward" # Check termination done = self.task.is_done() # Enforce the real-time factor self._enforce_rtf() return State((observation, reward, done, {}))
def step(self, action: np.ndarray) -> State: tau_m = action[0] theta_ddot = ( self.m * self.g * self.L / 2.0 * np.sin(self.theta) + tau_m ) / self.I # Integrate with euler. # Note that in this way the ground truth used as comparison implements a very # basic integration method, and the errors might be reduced implementing a # better integrator. self.theta_dot = self.theta_dot + theta_ddot * self.dt self.theta = self.theta + self.theta_dot * self.dt observation = np.array([np.cos(self.theta), np.sin(self.theta), self.theta_dot]) return State((Observation(observation), Reward(0.0), False, {}))
def step(self, action: Action) -> State: assert self.action_space.contains(action), \ "The action does not belong to the action space" # The bindings do not accept yet numpy types as arguments. We need to covert # numpy variables to the closer python type. # Check if the input variable is a numpy type is_numpy = type(action).__module__ == np.__name__ if is_numpy: if isinstance(action, np.ndarray): action = action.tolist() elif isinstance(action, np.number): action = action.item() else: assert False # Actions must be std::vector objects, so if the passed action is a scalar # we have to store it inside a list object before passing it to the bindings if isinstance(action, Number): action_list = [action] else: action_list = list(action) # Create the gympp::Sample object action_buffer = getattr(bindings, 'Vector' + self._act_dt)(action_list) action_sample = bindings.Sample(action_buffer) # Execute the step and get the std::optional<gympp::State> object state_optional = self.gympp_env.step(action_sample) assert state_optional.has_value() # Get the gympp::State state = state_optional.value() # Get the std::vector buffer of gympp::Observation observation_vector = getattr(state.observation, 'getBuffer' + self._obs_dt)() assert observation_vector, "Failed to get the observation buffer" assert observation_vector.size() > 0, "The observation does not contain elements" # Convert the SWIG type to a list observation_list = list(observation_vector) # Convert the observation to a numpy array (this is the only required copy) if isinstance(self.observation_space, gym.spaces.Box): observation = np.array(observation_list) elif isinstance(self.observation_space, gym.spaces.Discrete): assert observation_vector.size() == 1, "The buffer has the wrong dimension" observation = observation_list[0] else: assert False, "Space not supported" assert self.observation_space.contains(observation), \ "The returned observation does not belong to the space" # Create the info dict info = {'gympp': state.info} # Return the tuple return State((observation, state.reward, state.done, info))