def step(self, action): if self.if_render: self.render() # self.set_xyz_action_rot(action[:7]) if self.rotMode == 'euler': action_ = np.zeros(7) action_[:3] = action[:3] action_[3:] = euler2quat(action[3:6]) self.set_xyz_action_rot(action_) elif self.rotMode == 'fixed': self.set_xyz_action(action[:3]) elif self.rotMode == 'rotz': self.set_xyz_action_rotz(action[:4]) else: self.set_xyz_action_rot(action[:7]) self.do_simulation([action[-1], -action[-1]]) # The marker seems to get reset every time you do a simulation self._set_goal_marker(self._state_goal) ob = self._get_obs() obs_dict = self._get_obs_dict() reward, reachDist, pushDist = self.compute_reward(action, obs_dict) self.curr_path_length += 1 #info = self._get_info() if self.curr_path_length == self.max_path_length: done = True else: done = False return ob, reward, done, { 'reachDist': reachDist, 'goalDist': pushDist, 'epRew': reward, 'pickRew': None, 'success': float(pushDist <= 0.07) }
def step(self, action): if self.if_render: self.render() # self.set_xyz_action_rot(action[:7]) if self.rotMode == 'euler': action_ = np.zeros(7) action_[:3] = action[:3] action_[3:] = euler2quat(action[3:6]) self.set_xyz_action_rot(action_) elif self.rotMode == 'fixed': self.set_xyz_action(action[:3]) elif self.rotMode == 'rotz': self.set_xyz_action_rotz(action[:4]) else: self.set_xyz_action_rot(action[:7]) self.do_simulation([action[-1], -action[-1]]) # The marker seems to get reset every time you do a simulation ob = self._get_obs() obs_dict = self._get_obs_dict() reward, reachRew, reachDist, pickRew, hammerRew, hammerDist, screwDist = self.compute_reward( action, obs_dict, mode=self.rewMode) self.curr_path_length += 1 if self.curr_path_length == self.max_path_length: done = True else: done = False # return ob, reward, done, { 'reachRew':reachRew, 'reachDist': reachDist, 'pickRew':pickRew, # 'hammerRew': hammerRew, 'epRew' : reward, 'hammerDist': hammerDist, 'screwDist': screwDist} return ob, reward, done, { 'reachDist': reachDist, 'pickRew': pickRew, 'epRew': reward, 'goalDist': screwDist, 'success': float(screwDist <= 0.05) }
def step(self, action): # self.set_xyz_action_rot(action[:7]) if self.rotMode != 'quat': assert self.rotMode == 'euler' action_ = np.zeros(7) action_[:3] = action[:3] action_[3:] = euler2quat(action[3:6]) self.set_xyz_action_rot(action_) else: self.set_xyz_action_rot(action[:7]) self.do_simulation([action[-1], -action[-1]]) # The marker seems to get reset every time you do a simulation self._set_goal_marker(self._state_goal) ob = self._get_obs() obs_dict = self._get_obs_dict() reward , reachRew, reachDist, pickRew, placeRew , placingDist = self.compute_reward(action, obs_dict, mode = self.rewMode) self.curr_path_length +=1 #info = self._get_info() if self.curr_path_length == self.max_path_length: done = True else: done = False return ob, reward, done, { 'reachRew':reachRew, 'reachDist': reachDist, 'pickRew':pickRew, 'placeRew': placeRew, 'epRew' : reward, 'placingDist': placingDist}