def updateAction(self, action):
        import numpy as np
        """
            action[0] == hlc action
            action[1] == llc action
        """
        self._hlc_timestep = self._hlc_timestep + 1
        if ("dont_do_hrl_logic" in self._game_settings
            and (self._game_settings["dont_do_hrl_logic"] == True)):
            self._llc_target = clampValue([action[0][0], action[0][1], 0], self._vel_bounds)
        else:
            self._llc_target = clampValue([action[0][0], action[0][1], 0], self._vel_bounds)


        ### apply delta position change.
        action_ = np.array([action[1][0], action[1][1], 0])
        agentVel = np.array(p.getBaseVelocity(self._agent)[0])
        # print ("action_: ", action_, " agentVel: ", agentVel) 
        action_ = agentVel + action_
        action_ = clampValue(action_, self._vel_bounds)
        if ("use_hlc_action_directly" in self._game_settings
            and (self._game_settings["use_hlc_action_directly"] == True)):
            action_ = self._llc_target
        # print ("New action: ", action_)
        p.resetBaseVelocity(self._agent, linearVelocity=action_, angularVelocity=[0,0,0])
 def updateAction(self, action):
     import numpy as np
     """
         action[0] == hlc action
         action[1] == llc action
     """
     self._hlc_timestep = self._hlc_timestep + 1
     if (self._hlc_timestep >= self._hlc_skip 
         and (self._ran < 0.5)):
         # print ("Updating llc target from HLC")
         self._llc_target = clampValue([action[0][0], action[0][1], 0], self._vel_bounds)
         ### Need to store this target in the sim as a gobal location to allow for computing local distance state.
         pos = np.array(p.getBasePositionAndOrientation(self._agent)[0])
         # self._llc_target = self._llc_target + action_
         self._hlc_timestep = 0
         ### Update llc action
         llc_obs = self.getObservation()[1]
         ### crazy hack to get proper state size...
         # llc_obs = np.concatenate([llc_obs,[0,0,0,0,0,0]])
         action[1] = self._llc.predict([llc_obs])
         # action[1] = [0.03, -0.023]
         # print ("self._llc_target: ", self._llc_target)
     ### apply delta position change.
     action_ = np.array([action[1][0], action[1][1], 0])
     agentVel = np.array(p.getBaseVelocity(self._agent)[0])
     action_ = agentVel + action_
     action_ = clampValue(action_, self._vel_bounds)
     if ("use_hlc_action_directly" in self._game_settings
         and (self._game_settings["use_hlc_action_directly"] == True)):
         action_ = self._llc_target
     # print ("New action: ", action)
     p.resetBaseVelocity(self._agent, linearVelocity=action_, angularVelocity=[0,0,0])
Ejemplo n.º 3
0
 def updateAction(self, action):
     import numpy as np
     ### apply delta position change.
     action = np.array([action[0], action[1], 0])
     agentVel = np.array(p.getBaseVelocity(self._agent)[0])
     action = agentVel + action
     action = clampValue(action, self._vel_bounds)
     # action = np.array([1, 0, 0])
     # print ("New action: ", action)
     p.resetBaseVelocity(self._agent, linearVelocity=action, angularVelocity=[0,0,0])
     vel = p.getBaseVelocity(self._agent)[0]
     # print ("New vel: ", vel)
     self._hlc_timestep = self._hlc_timestep + 1
     if (self._hlc_timestep > self._hlc_skip):
         x = (np.random.rand()-0.5) * self._map_area * 2.0
         y = (np.random.rand()-0.5) * self._map_area * 2.0
         
         self._llc_target = np.array([x/self._map_area, y/self._map_area, 0])
         self._hlc_timestep = 0
Ejemplo n.º 4
0
 def updateAction(self, action):
     import numpy as np
     """
         action[0] == hlc action
         action[1] == llc action
     """
     self._hlc_timestep = self._hlc_timestep + 1
     if (self._hlc_timestep >= self._hlc_skip and (self._ran < 0.5)
             and (("use_MARL_HRL" in self._game_settings and
                   (self._game_settings["use_MARL_HRL"] == True)))):
         # print ("Updating llc target from HLC")
         if ("use_hardCoded_LLC_goals" in self._game_settings and
             (self._game_settings["use_hardCoded_LLC_goals"] == True)):
             # pass
             if ("use_full_pose_goal" in self._game_settings and
                 (self._game_settings["use_full_pose_goal"] == True)):
                 self._llc_target = self.genRandomPose()
             else:
                 x = (np.random.rand() - 0.5) * 2.0
                 y = (np.random.rand() - 0.5) * 2.0
                 self._llc_target = [x, y]
         else:
             if ("use_full_pose_goal" in self._game_settings and
                 (self._game_settings["use_full_pose_goal"] == True)):
                 self._llc_target = action[0]
             else:
                 self._llc_target = clampValue([action[0][0], action[0][1]],
                                               self._vel_bounds)
         ### Need to store this target in the sim as a gobal location to allow for computing local distance state.
         pos = np.array(
             self._p.getBasePositionAndOrientation(self._agent)[0])
         self._hlc_timestep = 0
         ### Update llc action
         llc_obs = self.getObservation()[1]
         ### crazy hack to get proper state size...
         if ("append_centralized_state_hack" in self._game_settings
                 and (self._game_settings["append_centralized_state_hack"]
                      == True)):
             llc_obs = np.concatenate([
                 llc_obs,
                 [
                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
                 ]
             ])
         action[1] = self._llc.predict([llc_obs])[0]
         # action[1] = [0.03, -0.023]
         # print ("self._llc_target: ", self._llc_target)
     ### apply delta position change.
     if ("use_MARL_HRL" in self._game_settings
             and (self._game_settings["use_MARL_HRL"] == False)):
         action_ = np.array(action[0])
     else:
         action_ = np.array(action[1])
     """
     if ("use_hlc_action_directly" in self._game_settings
         and (self._game_settings["use_hlc_action_directly"] == True)):
         action_ = self._llc_target
     """
     # print ("New action: ", action_)
     # self._p.resetBaseVelocity(self._agent, linearVelocity=action_, angularVelocity=[0,0,0])
     super(NavGameAntHRL, self).updateAction(action_)
Ejemplo n.º 5
0
    def updateAction(self, action):
        import numpy as np
        """
            action[0] == hlc action
            action[1] == llc action
        """
        if (self._hlc_timestep >= self._hlc_skip
                # and (self._ran < 0.5)
                and (("use_MARL_HRL" in self._game_settings and
                      (self._game_settings["use_MARL_HRL"] == True)))):
            self._hlc_timestep = 0
            self._update_goal = True
        self._hlc_timestep = self._hlc_timestep + 1
        ### apply delta position change.
        if ("use_MARL_HRL" in self._game_settings
                and (self._game_settings["use_MARL_HRL"] == False)):
            action_ = np.array(action[0])
        else:
            action_ = np.array(action[1])

        if ("use_hlc_action_directly" in self._game_settings
                and (self._game_settings["use_hlc_action_directly"] == True)):
            action_ = action[0]

        # print ("New action: ", action_)
        self._p.resetBasePositionAndOrientation(
            self._target, self._llc_target,
            self._p.getQuaternionFromEuler([0., 0, 0]))
        self._p.resetBaseVelocity(self._target, [0, 0, 0], [0, 0, 0])

        vel = self._p.getBaseVelocity(self._agent)[0]
        pos = np.array(self._p.getBasePositionAndOrientation(self._agent)[0])
        pos = clampValue(pos, self._pos_bounds)
        self._p.resetBasePositionAndOrientation(
            self._agent, pos, self._p.getQuaternionFromEuler([0., 0, 0]))
        # action_[2] = 0
        # print ("action_, vel: ", action_, vel)
        if ("use_direct_llc_action" in self._game_settings
                and (self._game_settings["use_direct_llc_action"] == True)):
            vel = action_
        else:
            vel = action_ + vel
        vel = clampValue(vel, self._llc_vel_bounds)
        # print ("vel: ", vel)
        self._p.resetBaseVelocity(self._agent,
                                  linearVelocity=vel,
                                  angularVelocity=[0, 0, 0])
        ### For any object within distance and z < 0.5 pull object
        for i in range(len(self._blocks)):
            posT = np.array(
                self._p.getBasePositionAndOrientation(self._blocks[i])[0])
            goalDirection = posT - pos
            goalDistance = np.sqrt((goalDirection * goalDirection).sum(axis=0))
            if (goalDistance < self._reach_goal_threshold and (pos[2] < 0.5)):
                self._p.resetBaseVelocity(self._blocks[i],
                                          linearVelocity=vel,
                                          angularVelocity=[0, 0, 0])
            else:
                self._p.resetBaseVelocity(self._blocks[i],
                                          linearVelocity=[0, 0, 0],
                                          angularVelocity=[0, 0, 0])
        super(CLEVROjectsHRL, self).updateAction(action_)
        vel = self._p.getBaseVelocity(self._agent)[0]
Ejemplo n.º 6
0
    def getObservation(self):
        import numpy as np
        out = []
        out_hlc = []
        if ("include_egocentric_vision" in self._game_settings and
            (self._game_settings["include_egocentric_vision"] == True)):
            localMap = self.getlocalMapObservation()
            out_hlc.extend(localMap)
        data = self.getRobotPose()
        # print ("Data: ", data)
        ### linear vel
        out_hlc.extend(data)
        ### angular vel
        pos = np.array(self._p.getBasePositionAndOrientation(self._agent)[0])
        for i in range(len(self._blocks)):
            posT = np.array(
                self._p.getBasePositionAndOrientation(self._blocks[i])[0])
            goalDirection = posT - pos
            out_hlc.extend([goalDirection[0], goalDirection[1]])

            posT = np.array(
                self._p.getBasePositionAndOrientation(
                    self._blocks_goals[i])[0])
            goalDirection = posT - pos
            out_hlc.extend([goalDirection[0], goalDirection[1]])

        if (self._hlc_timestep >= self._hlc_skip
                # and (self._ran < 0.5)
                and (("use_MARL_HRL" in self._game_settings and
                      (self._game_settings["use_MARL_HRL"] == True)))):
            # print ("Updating llc target from HLC")
            if ("use_hardCoded_LLC_goals" in self._game_settings and
                (self._game_settings["use_hardCoded_LLC_goals"] == True)):
                x = (np.random.rand() - 0.5) * 2.0
                y = (np.random.rand() - 0.5) * 2.0
                z = (np.random.rand() - 0.5)
                self._llc_target = np.array([x, y, z])
            else:
                if (self._hlp is not None):
                    # out_hlc = out_hlc + [0] * 35

                    goal = self._hlp.predict([out_hlc])[0]
                else:
                    goal = [0, 0, 0]
                # print ("goal: ", goal)
                self._llc_target = clampValue(goal, self._llc_vel_bounds)
            self._update_goal = False
        out_llc = []
        out_llc.extend(data)
        ### Relative distance from current LLC state
        # if (self._ran < 0.5):
        # out_llc.extend(np.array(self._llc_target) - np.array(data[0]))

        if ("use_MARL_HRL" in self._game_settings
                and (self._game_settings["use_MARL_HRL"] == True)):
            # diff = self._llc_target - pos
            diff = self._llc_target
            out_llc.extend(np.array(diff))
        else:
            posT = np.array(
                self._p.getBasePositionAndOrientation(self._blocks[i])[0])
            goalDirection = posT - pos
            out_llc.extend(np.array([goalDirection[0], goalDirection[1]]))
        # else:
        #     out_llc.extend(np.array(self._llc_target) - pos)
        # print ("out_llc: ", out_llc)
        if ("use_MARL_HRL" in self._game_settings
                and (self._game_settings["use_MARL_HRL"] == True)):
            out.append(np.array(out_hlc))
        out.append(np.array(out_llc))
        self._last_state = np.array(out)
        self._last_pose = np.array(
            self._p.getBasePositionAndOrientation(self._agent)[0])
        return self._last_state