def step(self, action): """ Implement the environment step abstraction. Execute action and returns: - action - observation - reward - done (status) """ self.iterator += 1 # Execute "action" self._pub.publish( ut_mara.getTrajectoryMessage(action[:self.numJoints], self.environment['jointOrder'], self.velocity)) # Take an observation obs = self.take_observation() # Fetch the positions of the end-effector which are nr_dof:nr_dof+3 rewardDist = ut_math.rmseFunc(obs[self.numJoints:(self.numJoints + 3)]) rewardOrientation = 2 * np.arccos(abs(obs[self.numJoints + 3])) reward = ut_math.computeReward(rewardDist, rewardOrientation, False) # Calculate if the env has been solved done = bool(self.iterator == self.max_episode_steps) # Return the corresponding observations, rewards, etc. return obs, reward, done, {}
def step(self, action): """ Implement the environment step abstraction. Execute action and returns: - action - observation - reward - done (status) """ self.iterator+=1 # Execute "action" self._pub.publish(ut_mara.getTrajectoryMessage( action[:self.numJoints], self.environment['jointOrder'], self.velocity)) self.ros_clock = rclpy.clock.Clock().now().nanoseconds # Take an observation obs = self.take_observation() # Fetch the positions of the end-effector which are nr_dof:nr_dof+3 rewardDist = ut_math.rmseFunc( obs[self.numJoints:(self.numJoints+3)] ) collided = self.collision() reward = ut_math.computeReward(rewardDist) # Calculate if the env has been solved done = bool(self.iterator == self.max_episode_steps) self.buffer_dist_rewards.append(rewardDist) self.buffer_tot_rewards.append(reward) info = {} if self.iterator % self.max_episode_steps == 0: max_dist_tgt = max(self.buffer_dist_rewards) mean_dist_tgt = np.mean(self.buffer_dist_rewards) std_dist_tgt = np.std(self.buffer_dist_rewards) min_dist_tgt = min(self.buffer_dist_rewards) skew_dist_tgt = skew(self.buffer_dist_rewards) max_tot_rew = max(self.buffer_tot_rewards) mean_tot_rew = np.mean(self.buffer_tot_rewards) std_tot_rew = np.std(self.buffer_tot_rewards) min_tot_rew = min(self.buffer_tot_rewards) skew_tot_rew = skew(self.buffer_tot_rewards) num_coll = self.collided info = {"infos":{"ep_dist_max": max_dist_tgt,"ep_dist_mean": mean_dist_tgt,"ep_dist_min": min_dist_tgt,\ "ep_rew_max": max_tot_rew,"ep_rew_mean": mean_tot_rew,"ep_rew_min": min_tot_rew,"num_coll": num_coll,\ "ep_dist_skew": skew_dist_tgt,"ep_dist_std": std_dist_tgt, "ep_rew_std": std_tot_rew, "ep_rew_skew":skew_tot_rew,\ "target_x": self.targetPosition[0],"target_y": self.targetPosition[1],"target_z": self.targetPosition[2]}} self.buffer_dist_rewards = [] self.buffer_tot_rewards = [] self.collided = 0 # Return the corresponding observations, rewards, etc. return obs, reward, done, info