def _step(self, a): state, reward, done, meta = CameraRobotEnv._step(self, a) if self.flag_timeout <= 0: self._flag_reposition() self.flag_timeout -= 1 return state, reward, done, meta
def _step(self, action): """Step forward the simulation, given the action. Args: action: A list of desired motor angles for eight motors. Returns: observations: The angles, velocities and torques of all motors. reward: The reward for the current state-action pair. done: Whether the episode has ended. info: A dictionary that stores diagnostic information. Raises: ValueError: The action dimension is not the same as the number of motors. ValueError: The magnitude of actions is out of bounds. """ #print("Env apply raw action", action) action = self._transform_action_to_motor_command(action) #print("Env apply action", action) #for _ in range(self._action_repeat): # self.robot.ApplyAction(action) # pybullet.stepSimulation() for i in range(len(self.Amax)): if action[i] > self.Amax[i]: self.Amax[i] = action[i] #print("Action max", self.Amax) for _ in range(self.action_repeat): state = CameraRobotEnv._step(self, action) return state
def _step(self, a): state, reward, done, meta = CameraRobotEnv._step(self, a) if self.flag_timeout <= 0 or (self.flag_timeout < 225 and self.robot.walk_target_dist < 0.4): if self.robot.walk_target_dist < 0.4: self.waypoint = 1 self._flag_reposition() self.flag_timeout -= 1 depth_size = 16 if "depth" in self.config["output"]: depth_obs = self.get_observations()["depth"] x_start = int(self.windowsz / 2 - depth_size) x_end = int(self.windowsz / 2 + depth_size) y_start = int(self.windowsz / 2 - depth_size) y_end = int(self.windowsz / 2 + depth_size) self.obstacle_dist = (np.mean(depth_obs[x_start:x_end, y_start:y_end, -1])) # state, reward, done, meta = CameraRobotEnv._step(self, a) # if self.flag_timeout <= 0 or (self.flag_timeout < FLAG_LIMIT and self.robot.walk_target_dist < 0.8): # self._flag_reposition() # self.flag_timeout -= 1 debug = 0 if debug: print( "Frame: {}, FlagTimeOut: {}, Reward: {:.3f}, Distance: {:.3f}, " .format(self.nframe, self.flag_timeout, reward, self.robot.walk_target_dist), done) return state, reward, done, meta
def _step(self, a): state, reward, done, meta = CameraRobotEnv._step(self, a) if self.flag_timeout <= 0 or (self.flag_timeout < 225 and self.robot.walk_target_dist < 0.8): self._flag_reposition() self.flag_timeout -= 1 self.obstacle_dist = (np.mean(state[16:48, 16:48, -1])) return state, reward, done, meta
def _step(self, a): state, reward, done, meta = CameraRobotEnv._step(self, a) if self.flag_timeout <= 0 or (self.flag_timeout < 225 and self.robot.walk_target_dist < 0.8): self._flag_reposition() self.flag_timeout -= 1 if "depth" in self.config["output"]: depth_obs = self.get_observations()["depth"] x_start = int(self.windowsz/2-16) x_end = int(self.windowsz/2+16) y_start = int(self.windowsz/2-16) y_end = int(self.windowsz/2+16) self.obstacle_dist = (np.mean(depth_obs[x_start:x_end, y_start:y_end, -1])) return state, reward, done, meta