def updateAction(self, action): import numpy as np """ action[0] == hlc action action[1] == llc action """ self._hlc_timestep = self._hlc_timestep + 1 if ("dont_do_hrl_logic" in self._game_settings and (self._game_settings["dont_do_hrl_logic"] == True)): self._llc_target = clampValue([action[0][0], action[0][1], 0], self._vel_bounds) else: self._llc_target = clampValue([action[0][0], action[0][1], 0], self._vel_bounds) ### apply delta position change. action_ = np.array([action[1][0], action[1][1], 0]) agentVel = np.array(p.getBaseVelocity(self._agent)[0]) # print ("action_: ", action_, " agentVel: ", agentVel) action_ = agentVel + action_ action_ = clampValue(action_, self._vel_bounds) if ("use_hlc_action_directly" in self._game_settings and (self._game_settings["use_hlc_action_directly"] == True)): action_ = self._llc_target # print ("New action: ", action_) p.resetBaseVelocity(self._agent, linearVelocity=action_, angularVelocity=[0,0,0])
def updateAction(self, action): import numpy as np """ action[0] == hlc action action[1] == llc action """ self._hlc_timestep = self._hlc_timestep + 1 if (self._hlc_timestep >= self._hlc_skip and (self._ran < 0.5)): # print ("Updating llc target from HLC") self._llc_target = clampValue([action[0][0], action[0][1], 0], self._vel_bounds) ### Need to store this target in the sim as a gobal location to allow for computing local distance state. pos = np.array(p.getBasePositionAndOrientation(self._agent)[0]) # self._llc_target = self._llc_target + action_ self._hlc_timestep = 0 ### Update llc action llc_obs = self.getObservation()[1] ### crazy hack to get proper state size... # llc_obs = np.concatenate([llc_obs,[0,0,0,0,0,0]]) action[1] = self._llc.predict([llc_obs]) # action[1] = [0.03, -0.023] # print ("self._llc_target: ", self._llc_target) ### apply delta position change. action_ = np.array([action[1][0], action[1][1], 0]) agentVel = np.array(p.getBaseVelocity(self._agent)[0]) action_ = agentVel + action_ action_ = clampValue(action_, self._vel_bounds) if ("use_hlc_action_directly" in self._game_settings and (self._game_settings["use_hlc_action_directly"] == True)): action_ = self._llc_target # print ("New action: ", action) p.resetBaseVelocity(self._agent, linearVelocity=action_, angularVelocity=[0,0,0])
def updateAction(self, action): import numpy as np ### apply delta position change. action = np.array([action[0], action[1], 0]) agentVel = np.array(p.getBaseVelocity(self._agent)[0]) action = agentVel + action action = clampValue(action, self._vel_bounds) # action = np.array([1, 0, 0]) # print ("New action: ", action) p.resetBaseVelocity(self._agent, linearVelocity=action, angularVelocity=[0,0,0]) vel = p.getBaseVelocity(self._agent)[0] # print ("New vel: ", vel) self._hlc_timestep = self._hlc_timestep + 1 if (self._hlc_timestep > self._hlc_skip): x = (np.random.rand()-0.5) * self._map_area * 2.0 y = (np.random.rand()-0.5) * self._map_area * 2.0 self._llc_target = np.array([x/self._map_area, y/self._map_area, 0]) self._hlc_timestep = 0
def updateAction(self, action): import numpy as np """ action[0] == hlc action action[1] == llc action """ self._hlc_timestep = self._hlc_timestep + 1 if (self._hlc_timestep >= self._hlc_skip and (self._ran < 0.5) and (("use_MARL_HRL" in self._game_settings and (self._game_settings["use_MARL_HRL"] == True)))): # print ("Updating llc target from HLC") if ("use_hardCoded_LLC_goals" in self._game_settings and (self._game_settings["use_hardCoded_LLC_goals"] == True)): # pass if ("use_full_pose_goal" in self._game_settings and (self._game_settings["use_full_pose_goal"] == True)): self._llc_target = self.genRandomPose() else: x = (np.random.rand() - 0.5) * 2.0 y = (np.random.rand() - 0.5) * 2.0 self._llc_target = [x, y] else: if ("use_full_pose_goal" in self._game_settings and (self._game_settings["use_full_pose_goal"] == True)): self._llc_target = action[0] else: self._llc_target = clampValue([action[0][0], action[0][1]], self._vel_bounds) ### Need to store this target in the sim as a gobal location to allow for computing local distance state. pos = np.array( self._p.getBasePositionAndOrientation(self._agent)[0]) self._hlc_timestep = 0 ### Update llc action llc_obs = self.getObservation()[1] ### crazy hack to get proper state size... if ("append_centralized_state_hack" in self._game_settings and (self._game_settings["append_centralized_state_hack"] == True)): llc_obs = np.concatenate([ llc_obs, [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ] ]) action[1] = self._llc.predict([llc_obs])[0] # action[1] = [0.03, -0.023] # print ("self._llc_target: ", self._llc_target) ### apply delta position change. if ("use_MARL_HRL" in self._game_settings and (self._game_settings["use_MARL_HRL"] == False)): action_ = np.array(action[0]) else: action_ = np.array(action[1]) """ if ("use_hlc_action_directly" in self._game_settings and (self._game_settings["use_hlc_action_directly"] == True)): action_ = self._llc_target """ # print ("New action: ", action_) # self._p.resetBaseVelocity(self._agent, linearVelocity=action_, angularVelocity=[0,0,0]) super(NavGameAntHRL, self).updateAction(action_)
def updateAction(self, action): import numpy as np """ action[0] == hlc action action[1] == llc action """ if (self._hlc_timestep >= self._hlc_skip # and (self._ran < 0.5) and (("use_MARL_HRL" in self._game_settings and (self._game_settings["use_MARL_HRL"] == True)))): self._hlc_timestep = 0 self._update_goal = True self._hlc_timestep = self._hlc_timestep + 1 ### apply delta position change. if ("use_MARL_HRL" in self._game_settings and (self._game_settings["use_MARL_HRL"] == False)): action_ = np.array(action[0]) else: action_ = np.array(action[1]) if ("use_hlc_action_directly" in self._game_settings and (self._game_settings["use_hlc_action_directly"] == True)): action_ = action[0] # print ("New action: ", action_) self._p.resetBasePositionAndOrientation( self._target, self._llc_target, self._p.getQuaternionFromEuler([0., 0, 0])) self._p.resetBaseVelocity(self._target, [0, 0, 0], [0, 0, 0]) vel = self._p.getBaseVelocity(self._agent)[0] pos = np.array(self._p.getBasePositionAndOrientation(self._agent)[0]) pos = clampValue(pos, self._pos_bounds) self._p.resetBasePositionAndOrientation( self._agent, pos, self._p.getQuaternionFromEuler([0., 0, 0])) # action_[2] = 0 # print ("action_, vel: ", action_, vel) if ("use_direct_llc_action" in self._game_settings and (self._game_settings["use_direct_llc_action"] == True)): vel = action_ else: vel = action_ + vel vel = clampValue(vel, self._llc_vel_bounds) # print ("vel: ", vel) self._p.resetBaseVelocity(self._agent, linearVelocity=vel, angularVelocity=[0, 0, 0]) ### For any object within distance and z < 0.5 pull object for i in range(len(self._blocks)): posT = np.array( self._p.getBasePositionAndOrientation(self._blocks[i])[0]) goalDirection = posT - pos goalDistance = np.sqrt((goalDirection * goalDirection).sum(axis=0)) if (goalDistance < self._reach_goal_threshold and (pos[2] < 0.5)): self._p.resetBaseVelocity(self._blocks[i], linearVelocity=vel, angularVelocity=[0, 0, 0]) else: self._p.resetBaseVelocity(self._blocks[i], linearVelocity=[0, 0, 0], angularVelocity=[0, 0, 0]) super(CLEVROjectsHRL, self).updateAction(action_) vel = self._p.getBaseVelocity(self._agent)[0]
def getObservation(self): import numpy as np out = [] out_hlc = [] if ("include_egocentric_vision" in self._game_settings and (self._game_settings["include_egocentric_vision"] == True)): localMap = self.getlocalMapObservation() out_hlc.extend(localMap) data = self.getRobotPose() # print ("Data: ", data) ### linear vel out_hlc.extend(data) ### angular vel pos = np.array(self._p.getBasePositionAndOrientation(self._agent)[0]) for i in range(len(self._blocks)): posT = np.array( self._p.getBasePositionAndOrientation(self._blocks[i])[0]) goalDirection = posT - pos out_hlc.extend([goalDirection[0], goalDirection[1]]) posT = np.array( self._p.getBasePositionAndOrientation( self._blocks_goals[i])[0]) goalDirection = posT - pos out_hlc.extend([goalDirection[0], goalDirection[1]]) if (self._hlc_timestep >= self._hlc_skip # and (self._ran < 0.5) and (("use_MARL_HRL" in self._game_settings and (self._game_settings["use_MARL_HRL"] == True)))): # print ("Updating llc target from HLC") if ("use_hardCoded_LLC_goals" in self._game_settings and (self._game_settings["use_hardCoded_LLC_goals"] == True)): x = (np.random.rand() - 0.5) * 2.0 y = (np.random.rand() - 0.5) * 2.0 z = (np.random.rand() - 0.5) self._llc_target = np.array([x, y, z]) else: if (self._hlp is not None): # out_hlc = out_hlc + [0] * 35 goal = self._hlp.predict([out_hlc])[0] else: goal = [0, 0, 0] # print ("goal: ", goal) self._llc_target = clampValue(goal, self._llc_vel_bounds) self._update_goal = False out_llc = [] out_llc.extend(data) ### Relative distance from current LLC state # if (self._ran < 0.5): # out_llc.extend(np.array(self._llc_target) - np.array(data[0])) if ("use_MARL_HRL" in self._game_settings and (self._game_settings["use_MARL_HRL"] == True)): # diff = self._llc_target - pos diff = self._llc_target out_llc.extend(np.array(diff)) else: posT = np.array( self._p.getBasePositionAndOrientation(self._blocks[i])[0]) goalDirection = posT - pos out_llc.extend(np.array([goalDirection[0], goalDirection[1]])) # else: # out_llc.extend(np.array(self._llc_target) - pos) # print ("out_llc: ", out_llc) if ("use_MARL_HRL" in self._game_settings and (self._game_settings["use_MARL_HRL"] == True)): out.append(np.array(out_hlc)) out.append(np.array(out_llc)) self._last_state = np.array(out) self._last_pose = np.array( self._p.getBasePositionAndOrientation(self._agent)[0]) return self._last_state