def step(self, action): if self.rotMode == 'euler': action_ = np.zeros(7) action_[:3] = action[:3] action_[3:] = euler2quat(action[3:6]) self.set_xyz_action_rot(action_) elif self.rotMode == 'fixed': self.set_xyz_action(action[:3]) elif self.rotMode == 'rotz': self.set_xyz_action_rotz(action[:4]) else: self.set_xyz_action_rot(action[:7]) self.do_simulation([action[-1], -action[-1]]) # The marker seems to get reset every time you do a simulation self._set_goal_marker(self._state_goal) ob = self._get_obs() reward, reachDist, pullDist = self.compute_reward(action, ob) self.curr_path_length += 1 #info = self._get_info() if self.curr_path_length == self.max_path_length: done = True else: done = False info = { 'reachDist': reachDist, 'goalDist': pullDist, 'epRew': reward, 'pickRew': None, 'success': float(pullDist <= 0.03) } info['goal'] = self._state_goal return ob, reward, done, info
def step(self, action): if self.rotMode == 'euler': action_ = np.zeros(7) action_[:3] = action[:3] action_[3:] = euler2quat(action[3:6]) self.set_xyz_action_rot(action_) elif self.rotMode == 'fixed': self.set_xyz_action(action[:3]) else: self.set_xyz_action_rot(action[:7]) self.do_simulation([action[-1], -action[-1]]) # The marker seems to get reset every time you do a simulation self._set_goal_marker(self._state_goal) ob = self._get_obs() obs_dict = self._get_obs_dict() reward, reachRew, reachDist, pickRew, placeRew, placingDist, success = self.compute_reward( action, obs_dict, mode=self.rewMode) self.curr_path_length += 1 #info = self._get_info() info = { 'reachDist': reachDist, 'pickRew': pickRew, 'epRew': reward, 'goalDist': placingDist, 'success': success } info['goal'] = self.goal return ob, reward, False, info
def step(self, action): if self.rotMode == 'euler': action_ = np.zeros(7) action_[:3] = action[:3] action_[3:] = euler2quat(action[3:6]) self.set_xyz_action_rot(action_) elif self.rotMode == 'fixed': self.set_xyz_action(action[:3]) elif self.rotMode == 'rotz': self.set_xyz_action_rotz(action[:4]) else: self.set_xyz_action_rot(action[:7]) self.do_simulation([action[-1], -action[-1]]) # The marker seems to get reset every time you do a simulation self._set_goal_marker( np.concatenate((self._state_goal, [self.stick_init_pos[-1]]))) ob = self._get_obs() obs_dict = self._get_obs_dict() reward, reachRew, reachDist, pickRew, pushRew, pushDist = self.compute_reward( action, obs_dict, mode=self.rewMode) self.curr_path_length += 1 if self.curr_path_length == self.max_path_length: done = True else: done = False info = { 'reachDist': reachDist, 'pickRew': pickRew, 'epRew': reward, 'goalDist': pushDist, 'success': float(pushDist <= 0.1 and reachDist <= 0.05) } info['goal'] = self.goal return ob, reward, done, info
def step(self, action): # self.set_xyz_action_rot(action[:7]) if self.rotMode == 'euler': action_ = np.zeros(7) action_[:3] = action[:3] action_[3:] = euler2quat(action[3:6]) self.set_xyz_action_rot(action_) elif self.rotMode == 'fixed': self.set_xyz_action(action[:3]) elif self.rotMode == 'rotz': self.set_xyz_action_rotz(action[:4]) else: self.set_xyz_action_rot(action[:7]) self.do_simulation([action[-1], -action[-1]]) # The marker seems to get reset every time you do a simulation ob = self._get_obs() obs_dict = self._get_obs_dict() reward , reachRew, reachDist, pickRew, hammerRew , hammerDist, screwDist = self.compute_reward(action, obs_dict, mode = self.rewMode) self.curr_path_length +=1 # return ob, reward, False, { 'reachRew':reachRew, 'reachDist': reachDist, 'pickRew':pickRew, # 'hammerRew': hammerRew, 'epRew' : reward, 'hammerDist': hammerDist, 'screwDist': screwDist} info = {'reachDist': reachDist, 'pickRew':pickRew, 'epRew' : reward, 'goalDist': screwDist, 'success': float(screwDist <= 0.05)} info['goal'] = self.goal done = (self.curr_path_length == self.max_path_length) return ob, reward, done, info
def step(self, action): if self.rotMode == 'euler': action_ = np.zeros(7) action_[:3] = action[:3] action_[3:] = euler2quat(action[3:6]) self.set_xyz_action_rot(action_) elif self.rotMode == 'fixed': self.set_xyz_action(action[:3]) elif self.rotMode == 'rotz': self.set_xyz_action_rotz(action[:4]) else: self.set_xyz_action_rot(action[:7]) self.do_simulation([action[-1], -action[-1]]) # The marker seems to get reset every time you do a simulation ob = self._get_obs() obs_dict = self._get_obs_dict() reward, reachDist, pressDist = self.compute_reward(action, obs_dict) self.curr_path_length += 1 info = { 'reachDist': reachDist, 'goalDist': pressDist, 'epRew': reward, 'pickRew': None, 'success': float(pressDist <= 0.04) } info['goal'] = self.goal return ob, reward, False, info
def step(self, action): # self.set_xyz_action_rot(action[:7]) if self.rotMode != 'quat': assert self.rotMode == 'euler' action_ = np.zeros(7) action_[:3] = action[:3] action_[3:] = euler2quat(action[3:6]) self.set_xyz_action_rot(action_) else: self.set_xyz_action_rot(action[:7]) self.do_simulation([action[-1], -action[-1]]) # The marker seems to get reset every time you do a simulation self._set_goal_marker(self._state_goal) ob = self._get_obs() obs_dict = self._get_obs_dict() reward, reachRew, reachDist, pickRew, placeRew, placingDist = self.compute_reward( action, obs_dict, mode=self.rewMode) self.curr_path_length += 1 #info = self._get_info() if self.curr_path_length == self.max_path_length: done = True else: done = False return ob, reward, done, { 'reachRew': reachRew, 'reachDist': reachDist, 'pickRew': pickRew, 'placeRew': placeRew, 'epRew': reward, 'placingDist': placingDist }
def step(self, action): if self.rotMode == 'euler': action_ = np.zeros(7) action_[:3] = action[:3] action_[3:] = euler2quat(action[3:6]) self.set_xyz_action_rot(action_) elif self.rotMode == 'fixed': self.set_xyz_action(action[:3]) elif self.rotMode == 'rotz': self.set_xyz_action_rotz(action[:4]) else: self.set_xyz_action_rot(action[:7]) self.do_simulation([action[-1], -action[-1]]) # The marker seems to get reset every time you do a simulation self._set_goal_marker(self._state_goal) ob = self._get_obs() obs_dict = self._get_obs_dict() reward , reachRew, reachDist, pushRew, pushDist, pickRew, placeRew , placingDist = self.compute_reward(action, obs_dict, mode=self.rewMode, task_type=self.task_type) self.curr_path_length +=1 #info = self._get_info() if self.curr_path_length == self.max_path_length: done = True else: done = False goal_dist = placingDist if self.task_type == 'pick_place' else pushDist if self.task_type == 'reach': success = float(reachDist <= 0.05) else: success = float(goal_dist <= 0.07) info = {'reachDist': reachDist, 'pickRew':pickRew, 'epRew' : reward, 'goalDist': goal_dist, 'success': success} info['goal'] = self._state_goal return ob, reward, done, info
def step(self, action): if self.rotMode == 'euler': action_ = np.zeros(7) action_[:3] = action[:3] action_[3:] = euler2quat(action[3:6]) self.set_xyz_action_rot(action_) elif self.rotMode == 'fixed': self.set_xyz_action(action[:3]) elif self.rotMode == 'rotz': self.set_xyz_action_rotz(action[:4]) else: self.set_xyz_action_rot(action[:7]) self.do_simulation([action[-1], -action[-1]]) ob = None ob = self._get_obs() reward = self.compute_reward() self.curr_path_length += 1 if self.curr_path_length == self.max_path_length: self._reset_hand() done = True else: done = False return ob, reward, done, { 'pos': ob, 'hand': self.get_endeff_pos(), 'success': self.is_goal() }
def step(self, action): # self.set_xyz_action_rot(action[:7]) if self.rotMode == 'euler': #ee pos xyz control + xyz rotation by euler action_ = np.zeros(7) action_[:3] = action[:3] action_[3:] = euler2quat(action[3:6]) self.set_xyz_action_rot(action_) elif self.rotMode == 'horizontal_fixed' or self.rotMode == 'vertical_fixed': self.set_xyz_action(action[:3]) #ee pos xyz control elif self.rotMode == 'rotz': self.set_xyz_action_rotz(action[:4]) #ee pos xyz control + z rotation else: self.set_xyz_action_rot(action[:7]) #ee pos xyz control + xyz rotation by quat? 불확실 # self.do_simulation([action[-1], -action[-1]]) #gripper 여닫는거인듯 self.do_simulation([action[-1], action[-1]]) #gripper 여닫는거인듯 # The marker seems to get reset every time you do a simulation self._set_goal_marker(self._state_goal) ob = self._get_obs() obs_dict = self._get_obs_dict() reward = self.compute_reward(action, obs_dict, mode = self.rewMode) self.curr_path_length +=1 #info = self._get_info() if self.curr_path_length == self.max_path_length: done = True else: done = False return ob, reward, done, { 'reachRew':reachRew, 'reachDist': reachDist, 'pickRew':pickRew, 'placeRew': placeRew, 'epRew' : reward, 'placingDist': placingDist}
def step(self, action): if self.rotMode == 'euler': action_ = np.zeros(7) action_[:3] = action[:3] action_[3:] = euler2quat(action[3:6]) self.set_xyz_action_rot(action_) elif self.rotMode == 'fixed': self.set_xyz_action(action[:3]) elif self.rotMode == 'rotz': self.set_xyz_action_rotz(action[:4]) else: self.set_xyz_action_rot(action[:7]) self.do_simulation([action[-1], -action[-1]]) # The marker seems to get reset every time you do a simulation self._set_goal_marker(self._state_goal) ob = self._get_obs() obs_dict = self._get_obs_dict() reward, reachRew, reachDist, pickRew, placeRew, placingDist = self.compute_reward( action, obs_dict, mode=self.rewMode) if self.reward_delay > 1: if self.count % self.reward_delay == 0: self.obs_dict = self._get_obs_dict() obs_dict = self.obs_dict reward, _, _, _, _, _ = self.compute_reward(action, obs_dict, mode=self.rewMode) self.count += 1 self.curr_path_length += 1 #info = self._get_info() if self.curr_path_length == self.max_path_length: done = True else: done = False done = False info = { 'reachDist': reachDist, 'pickRew': pickRew, 'epRew': reward, 'goalDist': placingDist, 'success': float(placingDist <= 0.08) } info['goal'] = self._state_goal info['in_hand'] = info['success'] info['distance'] = info['goalDist'] return ob, reward, done, info