Ejemplo n.º 1
0
 def step(self, action):
     if self.rotMode == 'euler':
         action_ = np.zeros(7)
         action_[:3] = action[:3]
         action_[3:] = euler2quat(action[3:6])
         self.set_xyz_action_rot(action_)
     elif self.rotMode == 'fixed':
         self.set_xyz_action(action[:3])
     elif self.rotMode == 'rotz':
         self.set_xyz_action_rotz(action[:4])
     else:
         self.set_xyz_action_rot(action[:7])
     self.do_simulation([action[-1], -action[-1]])
     # The marker seems to get reset every time you do a simulation
     self._set_goal_marker(self._state_goal)
     ob = self._get_obs()
     reward, reachDist, pullDist = self.compute_reward(action, ob)
     self.curr_path_length += 1
     #info = self._get_info()
     if self.curr_path_length == self.max_path_length:
         done = True
     else:
         done = False
     info = {
         'reachDist': reachDist,
         'goalDist': pullDist,
         'epRew': reward,
         'pickRew': None,
         'success': float(pullDist <= 0.03)
     }
     info['goal'] = self._state_goal
     return ob, reward, done, info
Ejemplo n.º 2
0
 def step(self, action):
     if self.rotMode == 'euler':
         action_ = np.zeros(7)
         action_[:3] = action[:3]
         action_[3:] = euler2quat(action[3:6])
         self.set_xyz_action_rot(action_)
     elif self.rotMode == 'fixed':
         self.set_xyz_action(action[:3])
     else:
         self.set_xyz_action_rot(action[:7])
     self.do_simulation([action[-1], -action[-1]])
     # The marker seems to get reset every time you do a simulation
     self._set_goal_marker(self._state_goal)
     ob = self._get_obs()
     obs_dict = self._get_obs_dict()
     reward, reachRew, reachDist, pickRew, placeRew, placingDist, success = self.compute_reward(
         action, obs_dict, mode=self.rewMode)
     self.curr_path_length += 1
     #info = self._get_info()
     info = {
         'reachDist': reachDist,
         'pickRew': pickRew,
         'epRew': reward,
         'goalDist': placingDist,
         'success': success
     }
     info['goal'] = self.goal
     return ob, reward, False, info
Ejemplo n.º 3
0
 def step(self, action):
     if self.rotMode == 'euler':
         action_ = np.zeros(7)
         action_[:3] = action[:3]
         action_[3:] = euler2quat(action[3:6])
         self.set_xyz_action_rot(action_)
     elif self.rotMode == 'fixed':
         self.set_xyz_action(action[:3])
     elif self.rotMode == 'rotz':
         self.set_xyz_action_rotz(action[:4])
     else:
         self.set_xyz_action_rot(action[:7])
     self.do_simulation([action[-1], -action[-1]])
     # The marker seems to get reset every time you do a simulation
     self._set_goal_marker(
         np.concatenate((self._state_goal, [self.stick_init_pos[-1]])))
     ob = self._get_obs()
     obs_dict = self._get_obs_dict()
     reward, reachRew, reachDist, pickRew, pushRew, pushDist = self.compute_reward(
         action, obs_dict, mode=self.rewMode)
     self.curr_path_length += 1
     if self.curr_path_length == self.max_path_length:
         done = True
     else:
         done = False
     info = {
         'reachDist': reachDist,
         'pickRew': pickRew,
         'epRew': reward,
         'goalDist': pushDist,
         'success': float(pushDist <= 0.1 and reachDist <= 0.05)
     }
     info['goal'] = self.goal
     return ob, reward, done, info
Ejemplo n.º 4
0
 def step(self, action):
     # self.set_xyz_action_rot(action[:7])
     if self.rotMode == 'euler':
         action_ = np.zeros(7)
         action_[:3] = action[:3]
         action_[3:] = euler2quat(action[3:6])
         self.set_xyz_action_rot(action_)
     elif self.rotMode == 'fixed':
         self.set_xyz_action(action[:3])
     elif self.rotMode == 'rotz':
         self.set_xyz_action_rotz(action[:4])
     else:
         self.set_xyz_action_rot(action[:7])
     self.do_simulation([action[-1], -action[-1]])
     # The marker seems to get reset every time you do a simulation
     ob = self._get_obs()
     obs_dict = self._get_obs_dict()
     reward , reachRew, reachDist, pickRew, hammerRew , hammerDist, screwDist = self.compute_reward(action, obs_dict, mode = self.rewMode)
     self.curr_path_length +=1
     # return ob, reward, False, { 'reachRew':reachRew, 'reachDist': reachDist, 'pickRew':pickRew,
     #                             'hammerRew': hammerRew, 'epRew' : reward, 'hammerDist': hammerDist, 'screwDist': screwDist}
     info = {'reachDist': reachDist, 'pickRew':pickRew, 'epRew' : reward, 'goalDist': screwDist, 'success': float(screwDist <= 0.05)}
     info['goal'] = self.goal
     done = (self.curr_path_length == self.max_path_length)
     return ob, reward, done, info
Ejemplo n.º 5
0
 def step(self, action):
     if self.rotMode == 'euler':
         action_ = np.zeros(7)
         action_[:3] = action[:3]
         action_[3:] = euler2quat(action[3:6])
         self.set_xyz_action_rot(action_)
     elif self.rotMode == 'fixed':
         self.set_xyz_action(action[:3])
     elif self.rotMode == 'rotz':
         self.set_xyz_action_rotz(action[:4])
     else:
         self.set_xyz_action_rot(action[:7])
     self.do_simulation([action[-1], -action[-1]])
     # The marker seems to get reset every time you do a simulation
     ob = self._get_obs()
     obs_dict = self._get_obs_dict()
     reward, reachDist, pressDist = self.compute_reward(action, obs_dict)
     self.curr_path_length += 1
     info = {
         'reachDist': reachDist,
         'goalDist': pressDist,
         'epRew': reward,
         'pickRew': None,
         'success': float(pressDist <= 0.04)
     }
     info['goal'] = self.goal
     return ob, reward, False, info
 def step(self, action):
     # self.set_xyz_action_rot(action[:7])
     if self.rotMode != 'quat':
         assert self.rotMode == 'euler'
         action_ = np.zeros(7)
         action_[:3] = action[:3]
         action_[3:] = euler2quat(action[3:6])
         self.set_xyz_action_rot(action_)
     else:
         self.set_xyz_action_rot(action[:7])
     self.do_simulation([action[-1], -action[-1]])
     # The marker seems to get reset every time you do a simulation
     self._set_goal_marker(self._state_goal)
     ob = self._get_obs()
     obs_dict = self._get_obs_dict()
     reward, reachRew, reachDist, pickRew, placeRew, placingDist = self.compute_reward(
         action, obs_dict, mode=self.rewMode)
     self.curr_path_length += 1
     #info = self._get_info()
     if self.curr_path_length == self.max_path_length:
         done = True
     else:
         done = False
     return ob, reward, done, {
         'reachRew': reachRew,
         'reachDist': reachDist,
         'pickRew': pickRew,
         'placeRew': placeRew,
         'epRew': reward,
         'placingDist': placingDist
     }
    def step(self, action):
        if self.rotMode == 'euler':
            action_ = np.zeros(7)
            action_[:3] = action[:3]
            action_[3:] = euler2quat(action[3:6])
            self.set_xyz_action_rot(action_)
        elif self.rotMode == 'fixed':
            self.set_xyz_action(action[:3])
        elif self.rotMode == 'rotz':
            self.set_xyz_action_rotz(action[:4])
        else:
            self.set_xyz_action_rot(action[:7])
        self.do_simulation([action[-1], -action[-1]])
        # The marker seems to get reset every time you do a simulation
        self._set_goal_marker(self._state_goal)
        ob = self._get_obs()
        obs_dict = self._get_obs_dict()
        reward , reachRew, reachDist, pushRew, pushDist, pickRew, placeRew , placingDist = self.compute_reward(action, obs_dict, mode=self.rewMode, task_type=self.task_type)
        self.curr_path_length +=1
        #info = self._get_info()
        if self.curr_path_length == self.max_path_length:
            done = True
        else:
            done = False

        goal_dist = placingDist if self.task_type == 'pick_place' else pushDist
        if self.task_type == 'reach':
            success = float(reachDist <= 0.05)
        else:
            success = float(goal_dist <= 0.07)
        info = {'reachDist': reachDist, 'pickRew':pickRew, 'epRew' : reward, 'goalDist': goal_dist, 'success': success}
        info['goal'] = self._state_goal
        return ob, reward, done, info
Ejemplo n.º 8
0
    def step(self, action):
        if self.rotMode == 'euler':
            action_ = np.zeros(7)
            action_[:3] = action[:3]
            action_[3:] = euler2quat(action[3:6])
            self.set_xyz_action_rot(action_)
        elif self.rotMode == 'fixed':
            self.set_xyz_action(action[:3])
        elif self.rotMode == 'rotz':
            self.set_xyz_action_rotz(action[:4])
        else:
            self.set_xyz_action_rot(action[:7])
        self.do_simulation([action[-1], -action[-1]])

        ob = None
        ob = self._get_obs()
        reward = self.compute_reward()
        self.curr_path_length += 1
        if self.curr_path_length == self.max_path_length:
            self._reset_hand()
            done = True
        else:
            done = False

        return ob, reward, done, {
            'pos': ob,
            'hand': self.get_endeff_pos(),
            'success': self.is_goal()
        }
Ejemplo n.º 9
0
 def step(self, action):
     # self.set_xyz_action_rot(action[:7])
     if self.rotMode == 'euler': #ee pos xyz control + xyz rotation by euler
         action_ = np.zeros(7)
         action_[:3] = action[:3]
         action_[3:] = euler2quat(action[3:6])
         self.set_xyz_action_rot(action_)
     elif self.rotMode == 'horizontal_fixed' or self.rotMode == 'vertical_fixed':
         self.set_xyz_action(action[:3]) #ee pos xyz control
     elif self.rotMode == 'rotz':
         self.set_xyz_action_rotz(action[:4]) #ee pos xyz control + z rotation
     else:
         self.set_xyz_action_rot(action[:7]) #ee pos xyz control + xyz rotation by quat? 불확실
     # self.do_simulation([action[-1], -action[-1]]) #gripper 여닫는거인듯
     self.do_simulation([action[-1], action[-1]]) #gripper 여닫는거인듯
     
     # The marker seems to get reset every time you do a simulation
     self._set_goal_marker(self._state_goal)
     ob = self._get_obs()
     obs_dict = self._get_obs_dict()
     reward = self.compute_reward(action, obs_dict, mode = self.rewMode)
     self.curr_path_length +=1
     #info = self._get_info()
     if self.curr_path_length == self.max_path_length:
         done = True
     else:
         done = False
     return ob, reward, done, { 'reachRew':reachRew, 'reachDist': reachDist, 'pickRew':pickRew, 'placeRew': placeRew, 'epRew' : reward, 'placingDist': placingDist}
    def step(self, action):
        if self.rotMode == 'euler':
            action_ = np.zeros(7)
            action_[:3] = action[:3]
            action_[3:] = euler2quat(action[3:6])
            self.set_xyz_action_rot(action_)
        elif self.rotMode == 'fixed':
            self.set_xyz_action(action[:3])
        elif self.rotMode == 'rotz':
            self.set_xyz_action_rotz(action[:4])
        else:
            self.set_xyz_action_rot(action[:7])
        self.do_simulation([action[-1], -action[-1]])
        # The marker seems to get reset every time you do a simulation
        self._set_goal_marker(self._state_goal)
        ob = self._get_obs()
        obs_dict = self._get_obs_dict()
        reward, reachRew, reachDist, pickRew, placeRew, placingDist = self.compute_reward(
            action, obs_dict, mode=self.rewMode)

        if self.reward_delay > 1:
            if self.count % self.reward_delay == 0:
                self.obs_dict = self._get_obs_dict()
            obs_dict = self.obs_dict
            reward, _, _, _, _, _ = self.compute_reward(action,
                                                        obs_dict,
                                                        mode=self.rewMode)
            self.count += 1

        self.curr_path_length += 1
        #info = self._get_info()
        if self.curr_path_length == self.max_path_length:
            done = True
        else:
            done = False
        done = False
        info = {
            'reachDist': reachDist,
            'pickRew': pickRew,
            'epRew': reward,
            'goalDist': placingDist,
            'success': float(placingDist <= 0.08)
        }
        info['goal'] = self._state_goal
        info['in_hand'] = info['success']
        info['distance'] = info['goalDist']
        return ob, reward, done, info