class RLBenchEnv(): """ make RLBench env to have same interfaces as openai.gym """ def __init__(self, task_name, state_type_list=['left_shoulder_rgb']): obs_config = ObservationConfig() obs_config.set_all(True) action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY) self.env = Environment( action_mode, obs_config=obs_config, headless=False) self.env.launch() try: self.task = self.env.get_task(getattr(sys.modules[__name__], task_name)) except: raise NotImplementedError _, obs = self.task.reset() if len(state_type_list)>0: self.observation_space=[] for state_type in state_type_list: state=getattr(obs, state_type) self.observation_space.append(spaces.Box(low=-np.inf, high=np.inf, shape=state.shape)) else: raise ValueError('No State Type!') self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(action_mode.action_size,), dtype=np.float32) self.state_type_list = state_type_list self.spec = Spec(task_name) def seed(self, seed_value): # set seed as in openai.gym env pass def render(self): # render the scene pass def _get_state(self, obs): if len(self.state_type_list)>0: state=[] for state_type in self.state_type_list: if state_type in image_types: image = getattr(obs, state_type) image=np.moveaxis(image, 2, 0) # change (H, W, C) to (C, H, W) for torch state.append(image) else: state.append(getattr(obs, state_type)) else: raise ValueError('State Type Not Exists!') return state def reset(self): descriptions, obs = self.task.reset() return self._get_state(obs) def step(self, action): obs_, reward, terminate = self.task.step(action) # reward in original rlbench is binary for success or not return self._get_state(obs_), reward, terminate, None def close(self): self.env.shutdown()
class SimulationEnvionment(): """ This can be a parent class from which we can have multiple child classes that can diversify for different tasks and deeper functions within the tasks. """ def __init__(self,\ action_mode=DEFAULT_ACTION_MODE,\ task=DEFAULT_TASK,\ dataset_root='',\ headless=True): obs_config = ObservationConfig() obs_config.set_all(True) action_mode = action_mode self.env = Environment(action_mode, dataset_root, obs_config=obs_config, headless=headless) # Dont need to call launch as task.get_task can launch env. self.task = self.env.get_task(task) _, obs = self.task.reset() self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(self.env.action_size, ), dtype=np.float32) self.logger = logger.create_logger(__class__.__name__) self.logger.propagate = 0 def _get_state(self, obs: Observation): """ This will be a hook over the environment to get the state. This will ensure that if changes need to be made to attributes of `Observation` then they can be made in a modular way. """ raise NotImplementedError() def reset(self): descriptions, obs = self.task.reset() return self._get_state(obs) def step(self, action): obs_, reward, terminate = self.task.step( action) # reward in original rlbench is binary for success or not return self._get_state(obs_), reward, terminate def shutdown(self): self.logger.info( "Environment Shutdown! Create New Instance If u want to start again" ) self.logger.handlers.pop() self.env.shutdown() def get_demos(self, num_demos): """ Should be implemented by child classes because other extra properties needed before training can be accessed via that. """ raise NotImplementedError()
def act(graph,model,sess): obs_config = ObservationConfig() # obs_config.set_all(True) # 这几个参数用来关闭摄像头 obs_config.right_shoulder_camera.set_all(False) obs_config.wrist_camera.set_all(False) obs_config.left_shoulder_camera.set_all(False) action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY) env = Environment( action_mode, obs_config=obs_config, headless=False) env.launch() task = env.get_task(ReachTarget) training_steps = 150 episode_length = 60 obs = None gripper = [1.0] # 爪子保持打开 for i in range(training_steps): # 初始化一次任务,包括初始Obs数据 if i % episode_length == 0: print('Reset Episode') descriptions, obs = task.reset() print(descriptions) # 网络的输入 vision = obs.front_rgb vision = vision[np.newaxis,:] state = obs.joint_positions state = state[np.newaxis,:] action = None feed_dict = { model.vision:vision, model.state:state, model.action:action } run_ops = [model.predict_out] with graph.as_default(): res = sess.run(run_ops,feed_dict=feed_dict) # print(res[0][0].shape) # print(gripper.shape) # input('check shape') action = np.concatenate([res[0][0],gripper],axis=-1) obs, reward, terminate = task.step(action) if reward == 1: print("successful try!") print('Done') env.shutdown()
def simulation(): # See rlbench/action_modes.py for other action modes action_mode = ActionMode(ArmActionMode.ABS_EE_POSE_PLAN) action_mode_v = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY) env = Environment(action_mode_v, '', ObservationConfig(), False) task = env.get_task(EmptyContainer) obj_pose_sensor = NoisyObjectPoseSensor(env) descriptions, obs = task.reset() allPoses = obj_pose_sensor.get_poses() all_object_poses = [ allPoses["Shape0"], allPoses["Shape1"], allPoses["Shape4"] ] all_object_poses.sort( key=lambda x: np.linalg.norm(x[:3] - allPoses["large_container"][:3])) agent = RandomAgent(allPoses["large_container"], allPoses["small_container0"], env) print(descriptions) obj_poses = obj_pose_sensor.get_poses() training_steps = 10000 episode_length = 1 i = 0 while i < training_steps: # Getting noisy object poses obj_poses = obj_pose_sensor.get_poses() mask = env._scene._cam_wrist_mask.capture_rgb() depth = (obs.wrist_depth, obs.wrist_rgb, mask) if agent.isResetting: #if agent._graspState == 1: #q.put(depth) nextState = agent.resetAct(obs, obj_poses) obs, reward, terminate = task.step(nextState) if not agent.isResetting: task._action_mode = action_mode_v env._action_mode = action_mode_v env._set_arm_control_action() else: if i > 0 and i % episode_length == 0: print(i, 'Reset Episode') agent.isResetting = True task._action_mode = action_mode env._action_mode = action_mode env._set_arm_control_action() agent.updateV() continue oldState, action = agent.act(obs) obs, reward, terminate = task.step(action) print(reward) nextState = np.zeros(8) nextState[:7] = obs.joint_positions nextState[7] = obs.gripper_open > 0.5 agent.updateQ(oldState, action, nextState, reward) i += 1 env.shutdown()
def _update_info_dict(self): # update info dict self._info["action mode"] = self._action_mode self._info["observation mode"] = self._observation_mode # TODO: action dim should related to robot, not action mode, here we fixed it temporally self._info["action dim"] = (self._action_config.action_size, ) self._info["action low"] = -np.ones(self._info["action dim"], dtype=np.float32) self._info["action high"] = np.ones(self._info["action dim"], dtype=np.float32) if self._observation_mode == "state" or self._observation_mode == "all": # TODO: observation should be determined without init the entire environment with suppress_stdout(): env = RLEnvironment(action_mode=self._action_config, obs_config=self._observation_config, headless=True, robot_configuration=self._robot_name) env.launch() task = env.get_task(get_named_class(self._task_name, tasks)) _, obs = task.reset() env.shutdown() del task del env self._info["time step"] = _DT self._info["state dim"] = tuple(obs.get_low_dim_data().shape) self._info["state low"] = np.ones(self._info["state dim"], dtype=np.float32) * -np.inf self._info["state high"] = np.ones(self._info["state dim"], dtype=np.float32) * np.inf if self._observation_mode == "vision" or self._observation_mode == "all": self._info["left shoulder rgb dim"] = tuple( self._observation_config.left_shoulder_camera.image_size) + ( 3, ) self._info["left shoulder rgb low"] = np.zeros( self._info["left shoulder rgb dim"], dtype=np.float32) self._info["left shoulder rgb high"] = np.ones( self._info["left shoulder rgb dim"], dtype=np.float32) self._info["right shoulder rgb dim"] = tuple( self._observation_config.right_shoulder_camera.image_size) + ( 3, ) self._info["right shoulder rgb low"] = np.zeros( self._info["right shoulder rgb dim"], dtype=np.float32) self._info["right shoulder rgb high"] = np.ones( self._info["right shoulder rgb dim"], dtype=np.float32) self._info["wrist rgb dim"] = tuple( self._observation_config.wrist_camera.image_size) + (3, ) self._info["wrist rgb low"] = np.zeros(self._info["wrist rgb dim"], dtype=np.float32) self._info["wrist rgb high"] = np.ones(self._info["wrist rgb dim"], dtype=np.float32) self._info["reward low"] = -np.inf self._info["reward high"] = np.inf
def simulation(q): # See rlbench/action_modes.py for other action modes withLearning = False action_mode = ActionMode(ArmActionMode.ABS_EE_POSE_PLAN) action_mode_v = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY) if withLearning: env = Environment(action_mode_v, '', ObservationConfig(), False) else: env = Environment(action_mode, '', ObservationConfig(), False) task = env.get_task(EmptyContainer) obj_pose_sensor = NoisyObjectPoseSensor(env) descriptions, obs = task.reset() allPoses = obj_pose_sensor.get_poses() agent = RandomAgent(allPoses["large_container"], allPoses["small_container0"], env) print(descriptions) obj_poses = obj_pose_sensor.get_poses() training_steps = 10000 episode_length = 100 i = 0 rewardFunc = Reward(env, ['Shape0', 'Shape2', 'Shape4'], "small_container0") while i < training_steps: # Getting noisy object poses obj_poses = obj_pose_sensor.get_poses() mask = env._scene._cam_wrist_mask.capture_rgb() data = (obs.wrist_depth, obs.wrist_rgb, mask) if withLearning: if i > 0 and i % episode_length == 0: print(i, 'Reset Episode') agent.updateV() i += 1 continue oldState, action = agent.act(obs) obs, _, terminate = task.step(action) reward = rewardFunc.evaluate(obs) print(reward) nextState = np.zeros(8) nextState[:7] = obs.joint_positions nextState[7] = obs.gripper_open > 0.5 agent.updateQ(oldState, action, nextState, reward) i += 1 else: q.put(data) nextState = agent.resetAct(obs, obj_poses) obs, reward, terminate = task.step(nextState) # if terminate: # break env.shutdown()
class RLBenchEnv(): """ make RLBench env to have same interfaces as openai.gym """ def __init__(self, task_name, state_type='left_shoulder_rgb'): obs_config = ObservationConfig() obs_config.set_all(True) action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY) self.env = Environment(action_mode, obs_config=obs_config, headless=False) self.env.launch() try: self.task = self.env.get_task( getattr(sys.modules[__name__], task_name)) except: raise NotImplementedError _, obs = self.task.reset() state = getattr(obs, state_type) self.state_type = state_type self.spec = Spec(task_name) self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(action_mode.action_size, ), dtype=np.float32) self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=state.shape) def seed(self, seed_value): # set seed as in openai.gym env pass def render(self): # render the scene pass def reset(self): descriptions, obs = self.task.reset() return getattr(obs, self.state_type) def step(self, action): obs_, reward, terminate = self.task.step(action) return getattr(obs_, self.state_type), reward, terminate, None def close(self): self.env.shutdown()
def job_worker_validation(worker_id, action_mode, obs_config, task_class, command_q: mp.Queue, result_q: mp.Queue, obs_scaling, redundancy_resolution_setup, seed): np.random.seed(worker_id + seed) # setup the environment env = Environment(action_mode=action_mode, obs_config=obs_config, headless=True) env.launch() task = env.get_task(task_class) task.reset() while True: command = command_q.get() command_type = command[0] command_args = command[1] if command_type == "reset": task.set_variation(task.sample_variation()) descriptions, observation = task.reset() if obs_scaling is not None: observation = observation.get_low_dim_data() / obs_scaling else: observation = observation.get_low_dim_data() result_q.put((descriptions, observation)) elif command_type == "step": actions = command_args[0] # check if we need to resolve redundancy if redundancy_resolution_setup["use_redundancy_resolution"]: actions[0:7], L = task.resolve_redundancy_joint_velocities( actions=actions[0:7], setup=redundancy_resolution_setup) else: # here we only care about the loss (for logging) _, L = task.resolve_redundancy_joint_velocities( actions=actions[0:7], setup=redundancy_resolution_setup) next_observation, reward, done = task.step(actions) if obs_scaling is not None: next_observation = next_observation.get_low_dim_data( ) / obs_scaling else: next_observation = next_observation.get_low_dim_data() result_q.put((next_observation, reward, done, L)) elif command_type == "kill": print("Killing worker %d" % worker_id) env.shutdown() break
class RLBench(BaseSimulator): def __init__(self, h): #64x64 camera outputs cam = CameraConfig(image_size=(64, 64)) obs_config = ObservationConfig(left_shoulder_camera=cam, right_shoulder_camera=cam, wrist_camera=cam, front_camera=cam) obs_config.set_all(True) # delta EE control with motion planning action_mode = ActionMode(ArmActionMode.DELTA_EE_POSE_PLAN_WORLD_FRAME) #Inits self.env = Environment(action_mode, obs_config=obs_config, headless=h) self.env.launch() self.task = self.env.get_task(ReachTarget) def reset(self): d, o = self.task.reset() return o def step(self, a, prev_state): # delta orientation d_quat = np.array([0, 0, 0, 1]) # gripper state gripper_open = 1.0 # delta position d_pos = np.zeros(3) # For positive magnitude if (a % 2 == 0): a = int(a / 2) d_pos[a] = 0.03 # For negative magnitude else: a = int((a - 1) / 2) d_pos[a] = -0.03 # Forming action as expected by the environment action = np.concatenate([d_pos, d_quat, [gripper_open]]) try: s, r, t = self.task.step(action) r *= 1000 # Handling failure in planning except ConfigurationPathError: s = prev_state r = -0.1 t = False # Handling wrong action for inverse Jacobian except InvalidActionError: s = prev_state r = -0.01 t = False # Get bounding box centroids x, y, z = self.task._scene._workspace.get_position() # Set bounding box limits minx = x - 0.25 maxx = x + 0.25 miny = y - 0.35 maxy = y + 0.35 minz = z maxz = z + 0.5 bounding_box = [minx, maxx, miny, maxy, minz, maxz] # Get gripper position gripper_pose = s.gripper_pose # Reward for being in the bounding box if (self.bb_check(bounding_box, gripper_pose)): r += 0.1 return s, r, t # Check if gripper in the bounding box def bb_check(self, bounding_box, gripper_pose): out = True if (gripper_pose[0] < bounding_box[0] or gripper_pose[0] > bounding_box[1]): out = False if (gripper_pose[1] < bounding_box[2] or gripper_pose[1] > bounding_box[3]): out = False if (gripper_pose[2] < bounding_box[4] or gripper_pose[2] > bounding_box[5]): out = False return out @staticmethod def n_actions(): return 6 def shutdown(self): print("Shutdown") self.env.shutdown() def __del__(self): print("Shutdown") self.env.shutdown()
class SimulationEnvironment(): """ This can be a parent class from which we can have multiple child classes that can diversify for different tasks and deeper functions within the tasks. """ def __init__(self, task_name, state_type_list=['left_shoulder_rgb'], dataset_root='', observation_mode='state', headless=True): obs_config = ObservationConfig() obs_config.set_all(True) action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY) self._observation_mode = observation_mode self.env = Environment( action_mode, dataset_root, obs_config=obs_config, headless=headless) # Dont need to call launch as task.get_task can launch env. self.env.launch() self.task = self.env.get_task(task_name) _, obs = self.task.reset() self.action_space = spaces.Box( low=-1.0, high=1.0, shape=(self.env.action_size,), dtype=np.float32) # self.logger = logger.create_logger(__class__.__name__) # self.logger.propagate = 0 if len(state_type_list) > 0: self.observation_space = [] # for state_type in state_type_list: # state = getattr(obs, state_type) self.observation_space = spaces.Box( low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape) else: raise ValueError('No State Type!') self.state_type_list = state_type_list def _get_state(self, obs): if len(self.state_type_list) > 0: if self._observation_mode == 'state': return self.get_low_dim_data(obs) elif self._observation_mode == 'vision': return None def get_low_dim_data(self, obs) -> np.ndarray: """Gets a 1D array of all the low-dimensional obseervations. :return: 1D array of observations. """ low_dim_data = [] if obs.gripper_open is None else [[obs.gripper_open]] for data in [obs.joint_velocities, obs.joint_positions, obs.joint_forces, obs.gripper_pose, obs.gripper_joint_positions, obs.gripper_touch_forces, obs.task_low_dim_state]: if data is not None: low_dim_data.append(data) return np.concatenate(low_dim_data) def reset(self): descriptions, obs = self.task.reset() return self._get_state(obs) def step(self, action): # reward in original rlbench is binary for success or not obs_, reward, terminate = self.task.step(action) return self._get_state(obs_), reward, terminate, None def shutdown(self): # self.logger.info("Environment Shutdown! Create New Instance If u want to start again") # self.logger.handlers.pop() self.env.shutdown()
def run(i, lock, task_index, variation_count, results, file_lock, tasks): """Each thread will choose one task and variation, and then gather all the episodes_per_task for that variation.""" # Initialise each thread with random seed np.random.seed(None) num_tasks = len(tasks) img_size = list(map(int, FLAGS.image_size)) obs_config = ObservationConfig() obs_config.set_all(True) obs_config.right_shoulder_camera.image_size = img_size obs_config.left_shoulder_camera.image_size = img_size obs_config.wrist_camera.image_size = img_size obs_config.front_camera.image_size = img_size # We want to save the masks as rgb encodings. obs_config.left_shoulder_camera.masks_as_one_channel = False obs_config.right_shoulder_camera.masks_as_one_channel = False obs_config.wrist_camera.masks_as_one_channel = False obs_config.front_camera.masks_as_one_channel = False if FLAGS.renderer == 'opengl': obs_config.right_shoulder_camera.render_mode = RenderMode.OPENGL obs_config.left_shoulder_camera.render_mode = RenderMode.OPENGL obs_config.wrist_camera.render_mode = RenderMode.OPENGL obs_config.front_camera.render_mode = RenderMode.OPENGL rlbench_env = Environment(action_mode=ActionMode(), obs_config=obs_config, headless=True) rlbench_env.launch() task_env = None tasks_with_problems = results[i] = '' while True: # Figure out what task/variation this thread is going to do with lock: if task_index.value >= num_tasks: print('Process', i, 'finished') break my_variation_count = variation_count.value t = tasks[task_index.value] task_env = rlbench_env.get_task(t) var_target = task_env.variation_count() if FLAGS.variations >= 0: var_target = np.minimum(FLAGS.variations, var_target) if my_variation_count >= var_target: # If we have reached the required number of variations for this # task, then move on to the next task. variation_count.value = my_variation_count = 0 task_index.value += 1 variation_count.value += 1 if task_index.value >= num_tasks: print('Process', i, 'finished') break t = tasks[task_index.value] print('Process', i, 'collecting task:', task_env.get_name(), '// variation:', my_variation_count) task_env = rlbench_env.get_task(t) task_env.set_variation(my_variation_count) obs, descriptions = task_env.reset() variation_path = os.path.join(FLAGS.save_path, task_env.get_name(), VARIATIONS_FOLDER % my_variation_count) check_and_make(variation_path) with open(os.path.join(variation_path, VARIATION_DESCRIPTIONS), 'wb') as f: pickle.dump(descriptions, f) episodes_path = os.path.join(variation_path, EPISODES_FOLDER) check_and_make(episodes_path) abort_variation = False for ex_idx in range(FLAGS.episodes_per_task): attempts = 10 while attempts > 0: try: # TODO: for now we do the explicit looping. demo, = task_env.get_demos(amount=1, live_demos=True) except Exception as e: attempts -= 1 if attempts > 0: continue problem = ( 'Process %d failed collecting task %s (variation: %d, ' 'example: %d). Skipping this task/variation.\n%s\n' % (i, task_env.get_name(), my_variation_count, ex_idx, str(e))) print(problem) tasks_with_problems += problem abort_variation = True break episode_path = os.path.join(episodes_path, EPISODE_FOLDER % ex_idx) with file_lock: save_demo(demo, episode_path) break if abort_variation: break results[i] = tasks_with_problems rlbench_env.shutdown()
class RLBenchDataEnvGroup(DataEnvGroup): ''' DataEnvGroup for RLBench environment. + The observation space can be modified through `global_config.env_args` + Observation space: - 'state': proprioceptive feature : [37] + task_specific > robot joint - velocities, positions, forces > gripper - pose, joint-position, touch_forces > task_low_dim_state - RGB-D/RGB image : (128 x 128 by default) > 'left_shoulder_rgb', 'right_shoulder_rgb' > 'front_rgb', 'wrist_rgb' - NOTE : Better to use only one of the RGB obvs rather than all, saves a lot of time while env creation. - Refer: https://github.com/stepjam/RLBench/blob/20988254b773aae433146fff3624d8bcb9ed7330/rlbench/observation_config.py + The action spaces by default are joint velocities [7] and gripper actuations [1]. - Dimension : [8] ''' def __init__(self, get_episode_type=None): super(RLBenchDataEnvGroup, self).__init__(get_episode_type) assert self.env_name == 'RLBENCH' self.env_obj = None self.use_gym = self.config.env_args['use_gym'] self.observation_mode = self.config.env_args['observation_mode'].lower( ) self.left_obv_key = 'left_shoulder_rgb' self.right_obv_key = 'right_shoulder_rgb' self.wrist_obv_key = 'wrist_rgb' self.front_obv_key = 'front_rgb' if self.observation_mode not in ['vision', 'state' ] and not self.use_gym: self.config.env_args['vis_obv_key'] = self.observation_mode self.vis_obv_key = self.config.env_args['vis_obv_key'] self.dof_obv_key = 'state' self.env_action_key = 'joint_velocities' self.env_gripper_key = 'gripper_open' self.obs_space = { self.dof_obv_key: (37), self.left_obv_key: (128, 128, 3), self.right_obv_key: (128, 128, 3), self.wrist_obv_key: (128, 128, 3), self.front_obv_key: (128, 128, 3) } self.action_space, self.gripper_space = (7), (1) if self.config.env_args['combine_action_space']: self.action_space += self.gripper_space def get_env(self, task=None): task = task if task else self.config.env_type if self.use_gym: assert type( task ) == str # NOTE : When using gym, the task has to be represented as a sting. assert self.observation_mode in ['vision', 'state'] env = gym.make( task, observation_mode=self.config.env_args['observation_mode'], render_mode=self.config.env_args['render_mode']) self.env_obj = env else: obs_config = ObservationConfig() if self.observation_mode == 'vision': obs_config.set_all(True) elif self.observation_mode == 'state': obs_config.set_all_high_dim(False) obs_config.set_all_low_dim(True) else: obs_config_dict = { self.left_obv_key: obs_config.left_shoulder_camera, self.right_obv_key: obs_config.right_shoulder_camera, self.wrist_obv_key: obs_config.wrist_camera, self.front_obv_key: obs_config.front_camera } assert self.observation_mode in obs_config_dict.keys() obs_config.set_all_high_dim(False) obs_config_dict[self.observation_mode].set_all(True) obs_config.set_all_low_dim(True) # TODO : Write code to change it from env_args action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY) self.env_obj = Environment(action_mode, obs_config=obs_config, headless=True) task = task if task else ReachTarget if type(task) == str: task = task.split('-')[0] task = self.env_obj._string_to_task(task) self.env_obj.launch() env = self.env_obj.get_task( task) # NOTE : `env` refered as `task` in RLBench docs. return env def _get_obs(self, obs, key): assert obs is not None and key is not None if type(obs) == tuple: obs = obs[1] if type(obs) == dict: return obs[key] elif type(obs) == Observation: if key == 'state': return obs.get_low_dim_data() return getattr(obs, key) def shutdown_env(self): if self.env_obj is None: print("Environment not created, call `.get_env()`") elif self.use_gym: self.env_obj.close() else: self.env_obj.shutdown() self.env_obj = None def teleoperate(self, demons_config, task=None): if self.config.env_args['keyboard_teleop']: raise NotImplementedError else: if self.env_obj is None or task is None: env = self.get_env(task) else: if type(task) == str: task = self.env_obj._string_to_task(task.split('-')[0]) env = self.env_obj.get_task(task) if self.use_gym: demos = env.task.get_demos(demons_config.n_runs, live_demos=True) else: demos = env.get_demos(demons_config.n_runs, live_demos=True) demos = np.array(demos).flatten() for i in range(demons_config.n_runs): sample = demos[i] if self.observation_mode != 'state': tr_vobvs = np.array([ self._get_obs(obs, self.vis_obv_key) for obs in sample ]) tr_dof = np.array([ self._get_obs(obs, self.dof_obv_key).flatten() for obs in sample ]) tr_actions = np.array([ self._get_obs(obs, self.env_action_key).flatten() for obs in sample ]) tr_gripper = np.array( [[self._get_obs(obs, self.env_gripper_key)] for obs in sample]) if self.config.env_args['combine_action_space']: tr_actions = np.concatenate((tr_actions, tr_gripper), axis=-1) print("Storing Trajectory") trajectory = {self.dof_obv_key: tr_dof, 'action': tr_actions} if self.observation_mode != 'state': trajectory.update({self.vis_obv_key: tr_vobvs}) store_trajectoy(trajectory, episode_type='teleop', task=task) self.shutdown_env() def random_trajectory(self, demons_config): env = self.get_env() obs = env.reset() tr_vobvs, tr_dof, tr_actions = [], [], [] for step in range(demons_config.flush_freq): if self.observation_mode != 'state': tr_vobvs.append(np.array(self._get_obs(obs, self.vis_obv_key))) tr_dof.append( np.array(self._get_obs(obs, self.dof_obv_key).flatten())) action = np.random.normal(size=self.action_space[0]) obs, reward, done, info = env.step(action) tr_actions.append(action) print("Storing Trajectory") trajectory = { self.dof_obv_key: np.array(tr_dof), 'action': np.array(tr_actions) } if self.observation_mode != 'state': trajectory.update({self.vis_obv_key: np.array(tr_vobvs)}) store_trajectoy(trajectory, episode_type='random') self.shutdown_env()
def __init__(self, task_class, n_features, load, n_movements): """ Learn the Movement from demonstration. :param task_class: Task that we aim to learn :param n_features: Number of RBF in n_features :param load: Load from data :param n_movements: how many movements do we want to learn """ frequency = 200 # To use 'saved' demos, set the path below, and set live_demos=False live_demos = not load DATASET = '' if live_demos else 'datasets' obs_config = ObservationConfig() obs_config.set_all_low_dim(True) obs_config.set_all_high_dim(False) self._task_class = task_class action_mode = ActionMode(ArmActionMode.ABS_JOINT_POSITION) group = Group("rlbench", ["d%d" % i for i in range(7)] + ["gripper"]) env = Environment(action_mode, DATASET, obs_config, headless=True) env.launch() task = env.get_task(task_class) trajectories = [] states = [] lengths = [] print("Start Demo") demos = task.get_demos(n_movements, live_demos=live_demos) print("End Demo") init = True for demo in demos: trajectory = NamedTrajectory(*group.refs) t = 0 for ob in demo: if t == 0: if init: print("State dim: %d" % ob.task_low_dim_state.shape[0]) init = False states.append(ob.task_low_dim_state) kwargs = { "d%d" % i: ob.joint_positions[i] for i in range(ob.joint_positions.shape[0]) } kwargs["gripper"] = ob.gripper_open trajectory.notify(duration=1 / frequency, **kwargs) t += 1 lengths.append(t / 200.) trajectories.append(trajectory) space = ClassicSpace(group, n_features=n_features, regularization=1E-15) z = np.linspace(-0.2, 1.2, 1000) Phi = space.get_phi(z) for i in range(n_features): plt.plot(z, Phi[:, i]) plt.show() parameters = np.array([ np.concatenate([ s, np.array([l]), LearnTrajectory(space, traj).get_block_params() ]) for s, l, traj in zip(states, lengths, trajectories) ]) np.save("parameters/%s_%d.npy" % (task.get_name(), n_features), parameters) env.shutdown()
class FakeRLBenchEnv(Environment): ROBOT_NAME = SUPPORTED_ROBOTS.keys() OBSERVATION_MODE = ("state", "vision", "all") ACTION_MODE = { "joint velocity": ArmActionMode.ABS_JOINT_VELOCITY, "delta joint velocity": ArmActionMode.DELTA_JOINT_VELOCITY, "joint position": ArmActionMode.ABS_JOINT_POSITION, "delta joint position": ArmActionMode.DELTA_JOINT_POSITION, "joint torque": ArmActionMode.ABS_JOINT_TORQUE, "delta joint torque": ArmActionMode.DELTA_JOINT_TORQUE, "effector velocity": ArmActionMode.ABS_EE_VELOCITY, "delta effector velocity": ArmActionMode.DELTA_EE_VELOCITY, "effector position": ArmActionMode.ABS_EE_POSE, "delta effector position": ArmActionMode.DELTA_EE_POSE } def __init__(self, task_name: str, observation_mode: str = "state", action_mode: str = "delta joint position", robot_name: str = "panda"): super(FakeRLBenchEnv, self).__init__(task_name) if task_name not in all_class_names(tasks): raise KeyError(f"Error: unknown task name {task_name}") if observation_mode not in FakeRLBenchEnv.OBSERVATION_MODE: raise KeyError( f"Error: unknown observation mode {observation_mode}, available: {FakeRLBenchEnv.OBSERVATION_MODE}" ) if action_mode not in FakeRLBenchEnv.ACTION_MODE: raise KeyError( f"Error: unknown action mode {action_mode}, available: {FakeRLBenchEnv.ACTION_MODE.keys()}" ) if robot_name not in FakeRLBenchEnv.ROBOT_NAME: raise KeyError( f"Error: unknown robot name {robot_name}, available: {FakeRLBenchEnv.ROBOT_NAME}" ) # TODO: modify the task/robot/arm/gripper to support early instantiation before v-rep launched self._observation_mode = observation_mode self._action_mode = action_mode self._task_name = task_name self._robot_name = robot_name self._observation_config = ObservationConfig() if self._observation_mode == "state": self._observation_config.set_all_low_dim(True) self._observation_config.set_all_high_dim(False) elif self._observation_mode == "vision": self._observation_config.set_all_low_dim(False) self._observation_config.set_all_high_dim(True) elif self._observation_mode == "all": self._observation_config.set_all(True) self._action_config = ActionMode( FakeRLBenchEnv.ACTION_MODE[self._action_mode]) self.env = None self.task = None self._update_info_dict() def init(self, display=False): with suppress_stdout(): self.env = RLEnvironment(action_mode=self._action_config, obs_config=self._observation_config, headless=not display, robot_configuration=self._robot_name) self.env.launch() self.task = self.env.get_task( get_named_class(self._task_name, tasks)) def reset(self, random: bool = True) -> StepDict: if not random: np.random.seed(0) self.task._static_positions = not random descriptions, obs = self.task.reset() # Returns a list of descriptions and the first observation next_step = {"opt": descriptions} if self._observation_mode == "state" or self._observation_mode == "all": next_step['s'] = obs.get_low_dim_data() if self._observation_mode == "vision" or self._observation_mode == "all": next_step["left shoulder rgb"] = obs.left_shoulder_rgb next_step["right_shoulder_rgb"] = obs.right_shoulder_rgb next_step["wrist_rgb"] = obs.wrist_rgb return next_step def step(self, last_step: StepDict) -> (StepDict, bool): assert 'a' in last_step, "Key 'a' for action not in last_step, maybe you passed a wrong dict ?" obs, reward, terminate = self.task.step(last_step['a']) last_step['r'] = reward last_step["info"] = {} next_step = {"opt": None} if self._observation_mode == "state" or self._observation_mode == "all": next_step['s'] = obs.get_low_dim_data() if self._observation_mode == "vision" or self._observation_mode == "all": next_step["left shoulder rgb"] = obs.left_shoulder_rgb next_step["right_shoulder_rgb"] = obs.right_shoulder_rgb next_step["wrist_rgb"] = obs.wrist_rgb return last_step, next_step, terminate def finalize(self) -> bool: with suppress_stdout(): self.env.shutdown() self.task = None self.env = None return True def name(self) -> str: return self._task_name # ------------- private methods ------------- # def _update_info_dict(self): # update info dict self._info["action mode"] = self._action_mode self._info["observation mode"] = self._observation_mode # TODO: action dim should related to robot, not action mode, here we fixed it temporally self._info["action dim"] = (self._action_config.action_size, ) self._info["action low"] = -np.ones(self._info["action dim"], dtype=np.float32) self._info["action high"] = np.ones(self._info["action dim"], dtype=np.float32) if self._observation_mode == "state" or self._observation_mode == "all": # TODO: observation should be determined without init the entire environment with suppress_stdout(): env = RLEnvironment(action_mode=self._action_config, obs_config=self._observation_config, headless=True, robot_configuration=self._robot_name) env.launch() task = env.get_task(get_named_class(self._task_name, tasks)) _, obs = task.reset() env.shutdown() del task del env self._info["time step"] = _DT self._info["state dim"] = tuple(obs.get_low_dim_data().shape) self._info["state low"] = np.ones(self._info["state dim"], dtype=np.float32) * -np.inf self._info["state high"] = np.ones(self._info["state dim"], dtype=np.float32) * np.inf if self._observation_mode == "vision" or self._observation_mode == "all": self._info["left shoulder rgb dim"] = tuple( self._observation_config.left_shoulder_camera.image_size) + ( 3, ) self._info["left shoulder rgb low"] = np.zeros( self._info["left shoulder rgb dim"], dtype=np.float32) self._info["left shoulder rgb high"] = np.ones( self._info["left shoulder rgb dim"], dtype=np.float32) self._info["right shoulder rgb dim"] = tuple( self._observation_config.right_shoulder_camera.image_size) + ( 3, ) self._info["right shoulder rgb low"] = np.zeros( self._info["right shoulder rgb dim"], dtype=np.float32) self._info["right shoulder rgb high"] = np.ones( self._info["right shoulder rgb dim"], dtype=np.float32) self._info["wrist rgb dim"] = tuple( self._observation_config.wrist_camera.image_size) + (3, ) self._info["wrist rgb low"] = np.zeros(self._info["wrist rgb dim"], dtype=np.float32) self._info["wrist rgb high"] = np.ones(self._info["wrist rgb dim"], dtype=np.float32) self._info["reward low"] = -np.inf self._info["reward high"] = np.inf def live_demo(self, amount: int, random: bool = True) -> SampleBatch: """ :param amount: number of demonstration trajectories to be generated :param random: if the starting position is random :return: observation list : [amount x [(steps-1) x [s, a] + [s_term, None]]], WARNING: that the action here is calculated from observation, when executing, they may cause some inaccuracy """ seeds = [rnd.randint(0, 4096) if random else 0 for _ in range(amount)] self.task._static_positions = not random demo_pack = [] for seed in seeds: np.random.seed(seed) pack = self.task.get_demos(1, True)[0] demo_traj = [] np.random.seed(seed) desc, obs = self.task.reset() v_tar = 0. for o_tar in pack[1:]: action = [] if self._action_config.arm == ArmActionMode.ABS_JOINT_VELOCITY: action.extend( (o_tar.joint_positions - obs.joint_positions) / _DT) elif self._action_config.arm == ArmActionMode.ABS_JOINT_POSITION: action.extend(o_tar.joint_positions) elif self._action_config.arm == ArmActionMode.ABS_JOINT_TORQUE: action.extend(o_tar.joint_forces) raise TypeError( "Warning, abs_joint_torque is not currently supported") elif self._action_config.arm == ArmActionMode.ABS_EE_POSE: action.extend(o_tar.gripper_pose) elif self._action_config.arm == ArmActionMode.ABS_EE_VELOCITY: # WARNING: This calculating method is not so accurate since rotation cannot be directed 'add' together # since the original RLBench decides to do so, we should follow it action.extend( (o_tar.gripper_pose - obs.gripper_pose) / _DT) elif self._action_config.arm == ArmActionMode.DELTA_JOINT_VELOCITY: v_tar = (o_tar.joint_positions - obs.joint_positions) / _DT action.extend(v_tar - obs.joint_velocities) raise TypeError( "Warning, delta_joint_velocity is not currently supported" ) elif self._action_config.arm == ArmActionMode.DELTA_JOINT_POSITION: action.extend(o_tar.joint_positions - obs.joint_positions) elif self._action_config.arm == ArmActionMode.DELTA_JOINT_TORQUE: action.extend(o_tar.joint_forces - obs.joint_forces) raise TypeError( "Warning, delta_joint_torque is not currently supported" ) elif self._action_config.arm == ArmActionMode.DELTA_EE_POSE: action.extend(o_tar.gripper_pose[:3] - obs.gripper_pose[:3]) q = Quaternion(o_tar.gripper_pose[3:7]) * Quaternion( obs.gripper_pose[3:7]).conjugate action.extend(list(q)) elif self._action_config.arm == ArmActionMode.DELTA_EE_VELOCITY: # WARNING: This calculating method is not so accurate since rotation cannot be directed 'add' together # since the original RLBench decides to do so, we should follow it v_tar_new = (o_tar.gripper_pose - obs.gripper_pose) / _DT action.extend(v_tar_new - v_tar) v_tar = v_tar_new raise TypeError( "Warning, delta_ee_velocity is not currently supported" ) action.append(1.0 if o_tar.gripper_open > 0.9 else 0.0) action = np.asarray(action, dtype=np.float32) demo_traj.append({ 'observation': obs, 'a': action, 's': obs.get_low_dim_data() }) obs, reward, done = self.task.step(action) demo_traj[-1]['r'] = reward demo_pack.append(demo_traj) return { "trajectory": demo_pack, "config": "default", "policy": "hand-coding", "env class": self.__class__.__name__, "env name": self._task_name, "env config": "default", "observation config": self._observation_mode, "robot config": self._robot_name, "action config": self._action_mode }
class PPCAImitation: def __init__(self, task_class, state_dim, n_features, n_cluster, n_latent, parameters=None, headless=False, cov_reg=1E-8, n_samples=50, dense_reward=False, imitation_noise=0.03): obs_config = ObservationConfig() obs_config.set_all_low_dim(True) obs_config.set_all_high_dim(False) self._obs_config = obs_config self._state_dim = state_dim self._headless = headless action_mode = ActionMode(ArmActionMode.ABS_JOINT_POSITION) self._task_class = task_class self._action_mode = action_mode self.env = Environment(action_mode, "", obs_config, headless=headless) self.env.launch() self.task = self.env.get_task(task_class) if parameters is None: self.parameters = np.load( "parameters/%s_%d.npy" % (self.task.get_name(), n_features))[:n_samples] # parameters = np.concatenate([parameters for _ in range(20)]) self.imitation_noise = imitation_noise self.parameters[:, :3] += imitation_noise * np.random.normal( size=self.parameters[:, :3].shape) self.mppca = MPPCA(n_cluster, n_latent, n_iterations=30, cov_reg=cov_reg, n_init=500) self.mppca.fit(self.parameters) self.rlmppca = None self.dense_reward = dense_reward print(np.exp(self.mppca.log_pi)) group = Group("rlbench", ["d%d" % i for i in range(7)] + ["gripper"]) self.space = ClassicSpace(group, n_features=n_features) print("mpcca learned") def stop(self, joint_gripper, previous_reward): if previous_reward == 0.: success = 0. for _ in range(50): obs, reward, terminate = self.task.step(joint_gripper) if reward == 1.0: success = 1. break return self.task._task.get_dense_reward(), success return self.task._task.get_dense_reward(), 1. def collect_samples(self, n_episodes=50, noise=True, isomorphic_noise=False): success_list = [] reward_list = [] latent = [] cluster = [] observations = [] parameters = [] ob = self.task.reset() for i in range(n_episodes): context = ob[1].task_low_dim_state observations.append(context) w, z, k = self.rlmppca.generate_full( np.expand_dims(context, 0), noise=noise, isomorphic_noise=isomorphic_noise) # w = self.parameters[i, 3:] parameters.append(w) latent.append(z) cluster.append(k) mp = MovementPrimitive( self.space, MovementPrimitive.get_params_from_block(self.space, w[1:])) duration = 1 if w[0] < 0 else w[0] print(duration) if self._headless: trajectory = mp.get_full_trajectory(duration=duration, frequency=200) else: trajectory = mp.get_full_trajectory(duration=5 * duration, frequency=200) tot_reward = 0. success = 0 for a1 in trajectory.values: # , a2 in zip(trajectory.values[:-1, :], trajectory.values[1:, :]): joint = a1 # (a2-a1)*20. joint_gripper = joint obs, reward, terminate = self.task.step(joint_gripper) if reward == 1. or terminate == 1.: if reward == 1.: success = 1. break tot_reward, success = self.stop(joint_gripper, success) # tot_reward = -np.mean(np.abs(context - ob[1].gripper_pose[:3])) # tot_reward = -(w[0]-0.2)**2 print(tot_reward) # print("my reward", -np.mean(np.abs(context - ob[1].gripper_pose[:3]))) # print("parameters", parameters[-1]) success_list.append(success) if self.dense_reward: reward_list.append(tot_reward) else: reward_list.append(success) ob = self.task.reset() print("-" * 50) print("Total reward", np.mean(reward_list)) print("-" * 50) return np.array(success_list), np.array(reward_list), np.array( parameters), np.array(latent), np.array(cluster), np.array( observations) def run_episode(self): self.env = Environment(self._action_mode, "", self._obs_config, headless=self._headless) self.env.launch() self.task = self.env.get_task(self._task_class) def shutdown(self): self.env.shutdown()
# task = env.get_task(StackBlocks) # available tasks: EmptyContainer, PlayJenga, PutGroceriesInCupboard, SetTheTable task = env.get_task( PlayJenga ) # available tasks: EmptyContainer, PlayJenga, PutGroceriesInCupboard, SetTheTable obj_pose_sensor = NoisyObjectPoseSensor(env) descriptions, obs = task.reset() agent = Agent(obs, obj_pose_sensor.get_poses()) while True: # Getting noisy object poses obj_poses = obj_pose_sensor.get_poses() # Getting various fields from obs current_joints = obs.joint_positions gripper_pose = obs.gripper_pose rgb = obs.wrist_rgb depth = obs.wrist_depth mask = obs.wrist_mask # Perform action and step simulation # action = agent.act(obs) action = agent.act(obs, obj_poses) obs, reward, terminate = task.step(action) # if terminate: # break env.shutdown()
class RLBenchEnv(gym.GoalEnv): """An gym wrapper for RLBench.""" metadata = {'render.modes': ['human']} def __init__(self, task_class, observation_mode='state', randomization_mode="none", rand_config=None, img_size=256, special_start=[], fixed_grip=-1, force_randomly_place=False, force_change_position=False, sparse=False, not_special_p = 0, ground_p = 0, special_is_grip=False, altview=False, procedural_ind=0, procedural_mode='same', procedural_set = [], is_mesh_obs=False, blank=False, COLOR_TUPLE=[1,0,0]): # blank is 0s agent. self.blank = blank #altview is whether to have second camera angle or not. True/False, "both" to concatentae the observations. self.altview=altview self.img_size=img_size self.sparse = sparse self.task_class = task_class self._observation_mode = observation_mode self._randomization_mode = randomization_mode #special start is a list of actions to take at the beginning. self.special_start = special_start #fixed_grip temp hack for keeping the gripper a certain way. Change later. 0 for fixed closed, 0.1 for fixed open, -1 for not fixed self.fixed_grip = fixed_grip #to force the task to be randomly placed self.force_randomly_place = force_randomly_place #force the task to change position in addition to rotation self.force_change_position = force_change_position obs_config = ObservationConfig() if observation_mode == 'state': obs_config.set_all_high_dim(False) obs_config.set_all_low_dim(True) elif observation_mode == 'vision' or observation_mode=="visiondepth" or observation_mode=="visiondepthmask": # obs_config.set_all(True) obs_config.set_all_high_dim(False) obs_config.set_all_low_dim(True) else: raise ValueError( 'Unrecognised observation_mode: %s.' % observation_mode) action_mode = ActionMode(ArmActionMode.DELTA_EE_POSE_PLAN_NOQ) print("using delta pose pan") if randomization_mode == "random": objs = ['target', 'boundary', 'Floor', 'Roof', 'Wall1', 'Wall2', 'Wall3', 'Wall4', 'diningTable_visible'] if rand_config is None: assert False self.env = DomainRandomizationEnvironment( action_mode, obs_config=obs_config, headless=True, randomize_every=RandomizeEvery.EPISODE, frequency=1, visual_randomization_config=rand_config ) else: self.env = Environment( action_mode, obs_config=obs_config, headless=True ) self.env.launch() self.task = self.env.get_task(task_class) # Probability. Currently used for probability that pick and lift task will start off gripper at a certain location (should probs be called non_special p) self.task._task.not_special_p = not_special_p # Probability that ground goal. self.task._task.ground_p = ground_p # For the "special" case, whether to grip the object or just hover above it. self.task._task.special_is_grip = special_is_grip # for procedural env self.task._task.procedural_ind = procedural_ind # procedural mode: same, increase, or random. self.task._task.procedural_mode = procedural_mode # ideally a list-like object, dictates the indices to sample from each episode. self.task._task.procedural_set = procedural_set # if state obs is mesh obs self.task._task.is_mesh_obs = is_mesh_obs self.task._task.sparse = sparse self.task._task.COLOR_TUPLE = COLOR_TUPLE _, obs = self.task.reset() cam_placeholder = Dummy('cam_cinematic_placeholder') cam_pose = cam_placeholder.get_pose().copy() #custom pose cam_pose = [ 1.59999931, 0. , 2.27999949 , 0.65328157, -0.65328145, -0.27059814, 0.27059814] cam_pose[0] = 1 cam_pose[2] = 1.5 self.frontcam = VisionSensor.create([img_size, img_size]) self.frontcam.set_pose(cam_pose) self.frontcam.set_render_mode(RenderMode.OPENGL) self.frontcam.set_perspective_angle(60) self.frontcam.set_explicit_handling(1) self.frontcam_mask = VisionSensor.create([img_size, img_size]) self.frontcam_mask.set_pose(cam_pose) self.frontcam_mask.set_render_mode(RenderMode.OPENGL_COLOR_CODED) self.frontcam_mask.set_perspective_angle(60) self.frontcam_mask.set_explicit_handling(1) if altview: alt_pose = [0.25 , -0.65 , 1.5, 0, 0.93879825 ,0.34169483 , 0] self.altcam = VisionSensor.create([img_size, img_size]) self.altcam.set_pose(alt_pose) self.altcam.set_render_mode(RenderMode.OPENGL) self.altcam.set_perspective_angle(60) self.altcam.set_explicit_handling(1) self.altcam_mask = VisionSensor.create([img_size, img_size]) self.altcam_mask.set_pose(alt_pose) self.altcam_mask.set_render_mode(RenderMode.OPENGL_COLOR_CODED) self.altcam_mask.set_perspective_angle(60) self.altcam_mask.set_explicit_handling(1) self.action_space = spaces.Box( low=-1.0, high=1.0, shape=(action_mode.action_size,)) if observation_mode == 'state': self.observation_space = spaces.Dict({ "observation": spaces.Box( low=-np.inf, high=np.inf, shape=self.task._task.get_state_obs().shape), "achieved_goal": spaces.Box( low=-np.inf, high=np.inf, shape=self.task._task.get_achieved_goal().shape), 'desired_goal': spaces.Box( low=-np.inf, high=np.inf, shape=self.task._task.get_desired_goal().shape) }) # Use the frontvision cam elif observation_mode == 'vision': self.frontcam.handle_explicitly() self.observation_space = spaces.Dict({ "state": spaces.Box( low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape), "observation": spaces.Box( low=0, high=1, shape=self.frontcam.capture_rgb().transpose(2,0,1).flatten().shape, ) }) if altview == "both": example = self.frontcam.capture_rgb().transpose(2,0,1).flatten() self.observation_space = spaces.Dict({ "state": spaces.Box( low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape), "observation": spaces.Box( low=0, high=1, shape=np.array([example,example]).shape, ) }) elif observation_mode == 'visiondepth': self.frontcam.handle_explicitly() self.observation_space = spaces.Dict({ "state": spaces.Box( low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape), "observation": spaces.Box( #thinking about not flattening the shape, because primarily used for dataset and not for training low=0, high=1, shape=np.array([self.frontcam.capture_rgb().transpose(2,0,1), self.frontcam.capture_depth()[None,...]]).shape, ) }) elif observation_mode == 'visiondepthmask': self.frontcam.handle_explicitly() self.frontcam_mask.handle_explicitly() self.observation_space = spaces.Dict({ "state": spaces.Box( low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape), "observation": spaces.Box( #thinking about not flattening the shape, because primarily used for dataset and not for training low=0, high=1, shape=np.array([self.frontcam.capture_rgb().transpose(2,0,1), self.frontcam.capture_depth()[None,...], rgb_handles_to_mask(self.frontcam_mask.capture_rgb())]).shape, ) }) if altview == "both": self.observation_space = spaces.Dict({ "state": spaces.Box( low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape), "observation": spaces.Box( #thinking about not flattening the shape, because primarily used for dataset and not for training low=0, high=1, shape=np.array([self.frontcam.capture_rgb().transpose(2,0,1), self.frontcam.capture_rgb().transpose(2,0,1), self.frontcam.capture_depth()[None,...], rgb_handles_to_mask(self.frontcam_mask.capture_rgb())]).shape, ) }) self._gym_cam = None # GoalEnv def compute_reward(self, achieved_goal, desired_goal, info): reward = self.task._task.compute_reward(achieved_goal, desired_goal, info) return reward def _extract_obs(self, obs): if self._observation_mode == 'state': return { 'observation': self.task._task.get_state_obs(), 'achieved_goal': self.task._task.get_achieved_goal(), 'desired_goal':self.task._task.get_desired_goal(), } elif self._observation_mode == 'vision': self.frontcam.handle_explicitly() if self.altview: self.altcam.handle_explicitly() second_view = self.altcam.capture_rgb().transpose(2,0,1).flatten() if self.altview == "both": return { 'achieved_goal': self.task._task.get_achieved_goal(), 'desired_goal':self.task._task.get_desired_goal(), 'save_state': self.task._task.get_save_state(), "observation": np.array([self.frontcam.capture_rgb().transpose(2,0,1).flatten(), second_view]) } return { 'achieved_goal': self.task._task.get_achieved_goal(), 'desired_goal':self.task._task.get_desired_goal(), 'save_state': self.task._task.get_save_state(), "observation": self.altcam.capture_rgb().transpose(2,0,1).flatten() } if self.blank: return { 'achieved_goal': self.task._task.get_achieved_goal(), 'desired_goal':self.task._task.get_desired_goal(), 'save_state': self.task._task.get_save_state(), "observation": np.zeros(self.observation_space['observation'].shape) } return { 'achieved_goal': self.task._task.get_achieved_goal(), 'desired_goal':self.task._task.get_desired_goal(), 'save_state': self.task._task.get_save_state(), "observation": self.frontcam.capture_rgb().transpose(2,0,1).flatten(), } elif self._observation_mode == 'visiondepth': self.frontcam.handle_explicitly() return { "state": obs.get_low_dim_data(), "observation": [self.frontcam.capture_rgb().transpose(2,0,1), self.frontcam.capture_depth()[None,...]] } elif self._observation_mode == 'visiondepthmask': self.frontcam.handle_explicitly() self.frontcam_mask.handle_explicitly() mask = np.array(self.frontcam_mask.capture_rgb()) if self.altview: self.altcam.handle_explicitly() self.altcam_mask.handle_explicitly() altmask = np.array(self.altcam_mask.capture_rgb()) second_view = self.altcam.capture_rgb().transpose(2,0,1) if self.altview == "both": assert False # no use case for this return { "state": obs.get_low_dim_data(), "observation": [self.frontcam.capture_rgb().transpose(2,0,1), self.altcam.capture_rgb().transpose(2,0,1).flatten()] } return { "state": obs.get_low_dim_data(), "observation": [self.altcam.capture_rgb().transpose(2,0,1), self.altcam.capture_depth()[None,...], altmask] } return { "state": obs.get_low_dim_data(), "observation": [self.frontcam.capture_rgb().transpose(2,0,1), self.frontcam.capture_depth()[None,...], mask] } def render(self, mode='human'): self.frontcam.handle_explicitly() return self.frontcam.capture_rgb() def reset(self): _, obs = self.task.reset(force_randomly_place=self.force_randomly_place, force_change_position=self.force_change_position) import time if len(self.special_start) != 0: for a in self.special_start: self.step(a) # Doesn't actually use obs. return self._extract_obs(obs) def step(self, action): if self.fixed_grip >= 0: action[3] = self.fixed_grip obs, reward, terminate = self.task.step(action) reward, done = self.task._task.get_reward_and_done(sparse=self.sparse) return self._extract_obs(obs), reward, done, {"is_success": int(done)} def close(self): self.env.shutdown() def rerender(self, state): self.task._task.restore_full_state(state) return self.render().transpose(2,0,1).flatten() def resample_step(self, state, action, sampled_goal): self.task._task.restore_save_state(state) Shape('target').set_position(sampled_goal) if self.altview == "both": self.frontcam.handle_explicitly() self.altcam.handle_explicitly() o_before = np.array([self.frontcam.capture_rgb().transpose(2,0,1).flatten(), self.altcam.capture_rgb().transpose(2,0,1).flatten()]) elif self.altview: self.altcam.handle_explicitly() o_before = self.altcam.capture_rgb().transpose(2,0,1).flatten() else: if self.blank: o_before = np.zeros(self.observation_space['observation'].shape) else: o_before = self.render().transpose(2,0,1).flatten() o, r, d, _ = self.step(action) o_after = o['observation'] return o_before, r, d, o_after
class RLBenchEnv(gym.Env): """An gym wrapper for RLBench.""" metadata = {'render.modes': ['human', 'rgb_array']} def __init__(self, task_class, observation_mode='state', render_mode: Union[None, str] = None): self._observation_mode = observation_mode self._render_mode = render_mode obs_config = ObservationConfig() if observation_mode == 'state': obs_config.set_all_high_dim(False) obs_config.set_all_low_dim(True) elif observation_mode == 'vision': obs_config.set_all(True) else: raise ValueError('Unrecognised observation_mode: %s.' % observation_mode) action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY) self.env = Environment(action_mode, obs_config=obs_config, headless=True) self.env.launch() self.task = self.env.get_task(task_class) _, obs = self.task.reset() self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(self.env.action_size, )) if observation_mode == 'state': self.observation_space = spaces.Box( low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape) elif observation_mode == 'vision': self.observation_space = spaces.Dict({ "state": spaces.Box(low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape), "left_shoulder_rgb": spaces.Box(low=0, high=1, shape=obs.left_shoulder_rgb.shape), "right_shoulder_rgb": spaces.Box(low=0, high=1, shape=obs.right_shoulder_rgb.shape), "wrist_rgb": spaces.Box(low=0, high=1, shape=obs.wrist_rgb.shape), "front_rgb": spaces.Box(low=0, high=1, shape=obs.front_rgb.shape), }) if render_mode is not None: # Add the camera to the scene cam_placeholder = Dummy('cam_cinematic_placeholder') self._gym_cam = VisionSensor.create([640, 360]) self._gym_cam.set_pose(cam_placeholder.get_pose()) if render_mode == 'human': self._gym_cam.set_render_mode(RenderMode.OPENGL3_WINDOWED) else: self._gym_cam.set_render_mode(RenderMode.OPENGL3) def _extract_obs(self, obs) -> Dict[str, np.ndarray]: if self._observation_mode == 'state': return obs.get_low_dim_data() elif self._observation_mode == 'vision': return { "state": obs.get_low_dim_data(), "left_shoulder_rgb": obs.left_shoulder_rgb, "right_shoulder_rgb": obs.right_shoulder_rgb, "wrist_rgb": obs.wrist_rgb, "front_rgb": obs.front_rgb, } def render(self, mode='human') -> Union[None, np.ndarray]: if mode != self._render_mode: raise ValueError( 'The render mode must match the render mode selected in the ' 'constructor. \nI.e. if you want "human" render mode, then ' 'create the env by calling: ' 'gym.make("reach_target-state-v0", render_mode="human").\n' 'You passed in mode %s, but expected %s.' % (mode, self._render_mode)) if mode == 'rgb_array': return self._gym_cam.capture_rgb() def reset(self) -> Dict[str, np.ndarray]: descriptions, obs = self.task.reset() del descriptions # Not used. return self._extract_obs(obs) def step(self, action) -> Tuple[Dict[str, np.ndarray], float, bool, dict]: obs, reward, terminate = self.task.step(action) return self._extract_obs(obs), reward, terminate, {} def close(self) -> None: self.env.shutdown()
class RLBenchEnv(gym.Env): """An gym wrapper for RLBench.""" metadata = {'render.modes': ['human']} def __init__(self, task_class, observation_mode='state'): self._observation_mode = observation_mode obs_config = ObservationConfig() if observation_mode == 'state': obs_config.set_all_high_dim(False) obs_config.set_all_low_dim(True) elif observation_mode == 'vision': obs_config.set_all(True) else: raise ValueError( 'Unrecognised observation_mode: %s.' % observation_mode) action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY) self.env = Environment( action_mode, obs_config=obs_config, headless=True) self.env.launch() self.task = self.env.get_task(task_class) _, obs = self.task.reset() self.action_space = spaces.Box( low=-1.0, high=1.0, shape=(action_mode.action_size,), dtype=np.float32) if observation_mode == 'state': self.observation_space = spaces.Box( low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape) elif observation_mode == 'vision': self.observation_space = spaces.Dict({ "state": spaces.Box( low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape), "left_shoulder_rgb": spaces.Box( low=0, high=1, shape=obs.left_shoulder_rgb.shape), "right_shoulder_rgb": spaces.Box( low=0, high=1, shape=obs.right_shoulder_rgb.shape), "wrist_rgb": spaces.Box( low=0, high=1, shape=obs.wrist_rgb.shape), }) self._gym_cam = None def _extract_obs(self, obs): if self._observation_mode == 'state': return obs.get_low_dim_data() elif self._observation_mode == 'vision': return { "state": obs.get_low_dim_data(), "left_shoulder_rgb": obs.left_shoulder_rgb, "right_shoulder_rgb": obs.right_shoulder_rgb, "wrist_rgb": obs.wrist_rgb, } def render(self, mode='human'): if self._gym_cam is None: # Add the camera to the scene cam_placeholder = Dummy('cam_cinematic_placeholder') self._gym_cam = VisionSensor.create([640, 360]) self._gym_cam.set_pose(cam_placeholder.get_pose()) self._gym_cam.set_render_mode(RenderMode.OPENGL3_WINDOWED) def reset(self): descriptions, obs = self.task.reset() del descriptions # Not used. return self._extract_obs(obs) def step(self, action): obs, reward, terminate = self.task.step(action) return self._extract_obs(obs), reward, terminate, None def close(self): self.env.shutdown()
class RLBenchEnv(gym.Env): """An gym wrapper for RLBench.""" metadata = {'render.modes': ['human']} def __init__(self, task_class, observation_mode='state', action_mode='joint', multi_action_space=False, discrete_action_space=False): self.num_skip_control = 20 self.epi_obs = [] self.obs_record = True self.obs_record_id = None self._observation_mode = observation_mode self.obs_config = ObservationConfig() if observation_mode == 'state': self.obs_config.set_all_high_dim(False) self.obs_config.set_all_low_dim(True) elif observation_mode == 'state-goal': self.obs_config.set_all_high_dim(False) self.obs_config.set_all_low_dim(True) self.obs_config.set_goal_info(True) elif observation_mode == 'vision': self.obs_config.set_all(False) self.obs_config.set_camera_rgb(True) elif observation_mode == 'both': self.obs_config.set_all(True) else: raise ValueError('Unrecognised observation_mode: %s.' % observation_mode) self._action_mode = action_mode self.ac_config = None if action_mode == 'joint': self.ac_config = ActionConfig( SnakeRobotActionConfig.ABS_JOINT_POSITION) elif action_mode == 'trigon': self.ac_config = ActionConfig( SnakeRobotActionConfig.TRIGON_MODEL_PARAM, is_discrete=discrete_action_space) else: raise ValueError('Unrecognised action_mode: %s.' % action_mode) self.env = Environment(action_config=self.ac_config, obs_config=self.obs_config, headless=True) self.env.launch() self.task = self.env.get_task(task_class) self.max_episode_steps = self.task.episode_len _, obs = self.task.reset() if action_mode == 'joint': self.action_space = spaces.Box( low=-1.7, high=1.7, shape=(self.ac_config.action_size, ), dtype=np.float32) elif action_mode == 'trigon': if multi_action_space: # action_space1 = spaces.MultiBinary(n=1) low1 = np.array([-0.8, -0.8]) high1 = np.array([0.8, 0.8]) action_space1 = spaces.Box(low=low1, high=high1, dtype=np.float32) # low = np.array([-0.8, -0.8, 1.0, 3.0, -50, -10, -0.1, -0.1]) # high = np.array([0.8, 0.8, 3.0, 5.0, 50, 10, 0.1, 0.1]) low2 = np.array([1.0]) high2 = np.array([3.0]) action_space2 = spaces.Box(low=low2, high=high2, dtype=np.float32) self.action_space = spaces.Tuple( (action_space1, action_space2)) elif discrete_action_space: self.action_space = spaces.MultiDiscrete([4, 4, 4]) else: # low = np.array([0.0, -0.8, -0.8, 1.0, 3.0, -50, -10, -0.1, -0.1]) # high = np.array([1.0, 0.8, 0.8, 3.0, 5.0, 50, 10, 0.1, 0.1]) low = np.array([-1, -1, -1]) high = np.array([1, 1, 1]) self.action_space = spaces.Box(low=low, high=high, dtype=np.float32) if observation_mode == 'state' or observation_mode == 'state-goal': self.observation_space = spaces.Box( low=-np.inf, high=np.inf, shape=(obs.get_low_dim_data().shape[0] * self.num_skip_control, )) if observation_mode == 'state-goal': self.goal_dim = obs.get_goal_dim() elif observation_mode == 'vision': self.observation_space = spaces.Box( low=0, high=1, shape=obs.head_camera_rgb.shape) elif observation_mode == 'both': self.observation_space = spaces.Dict({ "state": spaces.Box(low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape), "rattler_eye_rgb": spaces.Box(low=0, high=1, shape=obs.head_camera_rgb.shape), "rattler_eye_depth": spaces.Box(low=0, high=1, shape=obs.head_camera_depth.shape), }) self._gym_cam = None def _extract_obs(self, obs): if self._observation_mode == 'state': return obs.get_low_dim_data() elif self._observation_mode == 'state-goal': obs_goal = {'observation': obs.get_low_dim_data()} obs_goal.update(obs.get_goal_data()) return obs_goal elif self._observation_mode == 'vision': return obs.head_camera_rgb elif self._observation_mode == 'both': return { "state": obs.get_low_dim_data(), "rattler_eye_rgb": obs.head_camera_rgb, "rattle_eye_depth": obs.head_camera_depth, } def render(self, mode='human'): if self._gym_cam is None: # # Add the camera to the scene self._gym_cam = VisionSensor('monitor') self._gym_cam.set_resolution([640, 640]) self._gym_cam.set_render_mode(RenderMode.EXTERNAL_WINDOWED) def reset(self): obs_data_group = [] obs_data_dict = { 'observation': [], 'desired_goal': None, 'achieved_goal': None } descriptions, obs = self.task.reset() self.epi_obs.append(obs) obs_data = self._extract_obs(obs) for _ in range(self.num_skip_control): # obs_data_group.extend(obs_data) if isinstance(obs_data, list) or isinstance(obs_data, np.ndarray): obs_data_group.extend(obs_data) elif isinstance(obs_data, dict): obs_data_dict['observation'].extend(obs_data['observation']) obs_data_dict['desired_goal'] = obs_data['desired_goal'] obs_data_dict['achieved_goal'] = obs_data['achieved_goal'] ret_obs = obs_data_group if len(obs_data_group) else obs_data_dict del descriptions # Not used return ret_obs def step(self, action): obs_data_group = [] obs_data_dict = { 'observation': [], 'desired_goal': None, 'achieved_goal': None } reward_group = [] terminate = False for _ in range(self.num_skip_control): obs, reward, step_terminate, success = self.task.step(action) self.epi_obs.append(obs) obs_data = self._extract_obs(obs) if isinstance(obs_data, list) or isinstance(obs_data, np.ndarray): obs_data_group.extend(obs_data) elif isinstance( obs_data, dict): # used for hierarchical reinforcement algorithm obs_data_dict['observation'].extend(obs_data['observation']) obs_data_dict['desired_goal'] = obs_data['desired_goal'] obs_data_dict['achieved_goal'] = obs_data['achieved_goal'] reward_group.append(reward) terminate |= step_terminate if terminate: if self.obs_record and success: # record a successful experience self.record_obs("RobotPos") self.epi_obs = [] break ret_obs = obs_data_group if len(obs_data_group) else obs_data_dict return ret_obs, np.mean(reward_group), terminate, {} def close(self): self.env.shutdown() # def load_env_param(self): # self.env.load_env_param() def compute_reward(self, achieved_goal=None, desired_goal=None, info=None): assert achieved_goal is not None assert desired_goal is not None return self.task.compute_reward(achieved_goal, desired_goal) def record_obs(self, obs_part): if self.obs_record_id is None: record_filenames = glob.glob("./obs_record/obs_record_*.txt") record_filenames.sort(key=lambda filename: int( filename.split('_')[-1].split('.')[0])) if len(record_filenames) == 0: self.obs_record_id = 1 else: last_id = int( record_filenames[-1].split('_')[-1].split('.')[0]) self.obs_record_id = last_id + 1 else: self.obs_record_id += 1 filename = './obs_record/obs_record_' + str( self.obs_record_id) + '.txt' obs_record_file = open(filename, 'w') if obs_part == 'All': pass if obs_part == 'RobotPos': robot_pos_arr = [] for obs in self.epi_obs: robot_pos = obs.get_2d_robot_pos() robot_pos_arr.append(robot_pos) target_pos = self.task.get_goal() robot_pos_arr.append( target_pos) # The last line records the target position robot_pos_arr = np.array(robot_pos_arr) np.savetxt(obs_record_file, robot_pos_arr, fmt="%f") obs_record_file.close()
class RLBenchEnv(): """ make RLBench env to have same interfaces as openai.gym """ def __init__( self, task_name: str, state_type: list = 'state', ): # render_mode=None): """ create RL Bench environment :param task_name: task names can be found in rlbench.tasks :param state_type: state or vision or a sub list of state_types list like ['left_shoulder_rgb'] """ if state_type == 'state' or state_type == 'vision' or isinstance( state_type, list): self._state_type = state_type else: raise ValueError( 'State type value error, your value is {}'.format(state_type)) # self._render_mode = render_mode self._render_mode = None obs_config = ObservationConfig() obs_config.set_all(True) action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY) self.env = Environment(action_mode, obs_config=obs_config, headless=True) self.env.launch() try: self.task = self.env.get_task( getattr(sys.modules[__name__], task_name)) except: raise NotImplementedError _, obs = self.task.reset() self.spec = Spec(task_name) if self._state_type == 'state': self.observation_space = spaces.Box( low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape) elif self._state_type == 'vision': space_dict = OrderedDict() space_dict["state"] = spaces.Box( low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape) for i in [ "left_shoulder_rgb", "right_shoulder_rgb", "wrist_rgb", "front_rgb" ]: space_dict[i] = spaces.Box(low=0, high=1, shape=getattr(obs, i).shape) self.observation_space = spaces.Dict(space_dict) else: space_dict = OrderedDict() for name in self._state_type: if name.split('_')[-1] in ('rgb'): space_dict[name] = spaces.Box(low=0, high=1, shape=getattr(obs, name).shape) elif name.split('_')[-1] in ('depth', 'mask'): space_dict[name] = spaces.Box(low=0, high=1, shape=(128, 128, 3)) print(space_dict[name].shape) else: space_dict[name] = spaces.Box(low=-np.inf, high=np.inf, shape=getattr(obs, name).shape) self.observation_space = spaces.Dict(space_dict) self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(self.env.action_size, ), dtype=np.float32) # if render_mode is not None: # # Add the camera to the scene # cam_placeholder = Dummy('cam_cinematic_placeholder') # self._gym_cam = VisionSensor.create([640, 360]) # self._gym_cam.set_pose(cam_placeholder.get_pose()) # if render_mode == 'human': # self._gym_cam.set_render_mode(RenderMode.OPENGL3_WINDOWED) # else: # self._gym_cam.set_render_mode(RenderMode.OPENGL3) def _extract_obs(self, obs): if self._state_type == 'state': return np.array(obs.get_low_dim_data(), np.float32) elif self._state_type == 'vision': return np.array([ np.array(obs.get_low_dim_data(), np.float32), np.array(obs.left_shoulder_rgb, np.float32), np.array(obs.right_shoulder_rgb, np.float32), np.array(obs.wrist_rgb, np.float32), np.array(obs.front_rgb, np.float32), ]) else: result = [] for name in self._state_type: if name.split('_')[-1] in ('mask', 'depth'): a = np.expand_dims(np.array(getattr(obs, name), np.float32), axis=-1) a = np.concatenate( (a, np.zeros((128, 128, 2), dtype=a.dtype)), axis=2) result.append(a) else: result.append(np.array(getattr(obs, name), np.float32)) return np.array(result) def seed(self, seed_value): # set seed as in openai.gym env pass def render(self, mode='human'): # todo render available at any time if self._render_mode is None: self._render_mode = mode # Add the camera to the scene cam_placeholder = Dummy('cam_cinematic_placeholder') self._gym_cam = VisionSensor.create([640, 360]) self._gym_cam.set_pose(cam_placeholder.get_pose()) if mode == 'human': self._gym_cam.set_render_mode(RenderMode.OPENGL3_WINDOWED) else: self._gym_cam.set_render_mode(RenderMode.OPENGL3) if mode != self._render_mode: raise ValueError( 'The render mode must match the render mode selected in the ' 'constructor. \nI.e. if you want "human" render mode, then ' 'create the env by calling: ' 'gym.make("reach_target-state-v0", render_mode="human").\n' 'You passed in mode %s, but expected %s.' % (mode, self._render_mode)) if mode == 'rgb_array': return self._gym_cam.capture_rgb() def reset(self): descriptions, obs = self.task.reset() return self._extract_obs(obs) def step(self, action): obs, reward, terminate = self.task.step(action) """ img = Image.fromarray(np.uint8(np.array(obs.left_shoulder_rgb, np.float32)*255), 'RGB') img.show() a = np.array(obs.left_shoulder_mask, np.float32) img = Image.fromarray(np.uint8(a*255), 'L') img.show() a = np.array(obs.left_shoulder_depth, np.float32) img = Image.fromarray(np.uint8(a*255), 'L') img.show() input('> ') """ return self._extract_obs(obs), reward, terminate, None def close(self): self.env.shutdown()
class RLBenchEnv(object): """ make RLBench env to have same interfaces as openai.gym """ def __init__(self, task_name, state_type_list=None, headless=False): if state_type_list is None: state_type_list = ['left_shoulder_rgb'] obs_config = ObservationConfig() obs_config.set_all(True) action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY) self.env = Environment(action_mode, obs_config=obs_config, headless=headless) self.env.launch() try: self.task = self.env.get_task(ReachTarget) except: raise NotImplementedError _, obs = self.task.reset() if len(state_type_list) > 0: self.observation_space = [] for state_type in state_type_list: state = getattr(obs, state_type) if isinstance(state, (int, float)): shape = (1, ) else: shape = state.shape self.observation_space.append( spaces.Box(low=-np.inf, high=np.inf, shape=shape)) else: raise ValueError('No State Type!') self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(action_mode.action_size, ), dtype=np.float32) self.state_type_list = state_type_list def seed(self, seed_value): # set seed as in openai.gym env pass def render(self): # render the scene pass def _get_state(self, obs): if len(self.state_type_list) > 0: state = [] for state_type in self.state_type_list: if state_type in image_types: image = getattr(obs, state_type) image = np.moveaxis( image, 2, 0) # change (H, W, C) to (C, H, W) for torch state.append(image) elif state_type in primitive_types: state.append(np.array([getattr(obs, state_type)])) else: state.append(getattr(obs, state_type)) else: raise ValueError('State Type Not Exists!') return state def reset(self): descriptions, obs = self.task.reset() return self._get_state(obs) def step(self, action): obs, reward, terminate = self.task.step( action) # reward in original rlbench is binary for success or not # distance between current pos (tip) and target pos # x, y, z, qx, qy, qz, qw = self.task._robot.arm.get_tip().get_pose() x, y, z, qx, qy, qz, qw = self.task._robot.gripper.get_pose() tar_x, tar_y, tar_z, _, _, _, _ = self.task._task.target.get_pose() distance = (np.abs(x - tar_x) + np.abs(y - tar_y) + np.abs(z - tar_z)) reward -= 1. * distance # speed regulation joint_velocities = self.task._robot.arm.get_joint_velocities() velocities = np.inner(joint_velocities, joint_velocities) reward -= 0.05 / len(joint_velocities) * velocities # orientation gripper_x, gripper_y, gripper_z, gripper_qx, gripper_qy, gripper_qz, gripper_qw = self.task._robot.gripper.get_pose( ) v1 = np.array( (tar_x - gripper_x, tar_y - gripper_y, tar_z - gripper_z)) temp = np.sin(np.arccos(gripper_qw)) v2 = np.array( (gripper_qx / temp, gripper_qy / temp, gripper_qz / temp)) orientation_angle = np.abs( np.arccos(v1.dot(v2) / np.sqrt(v1.dot(v1)) / np.sqrt(v2.dot(v2)))) # [0, pi] orientation_angle = orientation_angle if orientation_angle < np.pi / 2. else ( np.pi - orientation_angle) reward -= 0.5 / np.pi * orientation_angle # if orientation_angle > np.pi / 8. else 0. return self._get_state(obs), reward, terminate, None def close(self): self.env.shutdown()
class RLBenchEnv(object): """ make RLBench env to have same interfaces as openai.gym """ def __init__(self, task_name, state_type_list=None, headless=False): if state_type_list is None: state_type_list = ['left_shoulder_rgb'] obs_config = ObservationConfig() obs_config.set_all(True) action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY) self.env = Environment(action_mode, obs_config=obs_config, headless=headless) self.env.launch() try: self.task = self.env.get_task(ReachTarget) except: raise NotImplementedError _, obs = self.task.reset() if len(state_type_list) > 0: self.observation_space = [] for state_type in state_type_list: state = getattr(obs, state_type) self.observation_space.append( spaces.Box(low=-np.inf, high=np.inf, shape=state.shape)) else: raise ValueError('No State Type!') self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(action_mode.action_size, ), dtype=np.float32) self.state_type_list = state_type_list def seed(self, seed_value): # set seed as in openai.gym env pass def render(self): # render the scene pass def _get_state(self, obs): # state = np.hstack((obs.joint_positions, obs.joint_velocities, obs.joint_forces)) if len(self.state_type_list) > 0: state = [] for state_type in self.state_type_list: if state_type in image_types: image = getattr(obs, state_type) image = np.moveaxis( image, 2, 0) # change (H, W, C) to (C, H, W) for torch state.append(image) else: state.append(getattr(obs, state_type)) else: raise ValueError('State Type Not Exists!') return state def reset(self): descriptions, obs = self.task.reset() # self.task._task.target.set_pose(np.array([0.354685515165329, -0.025212552398443222, 1.0284103155136108, 0.0, 0.0, 0.0, 1.0])) return self._get_state(obs) def step(self, action): obs, reward, terminate = self.task.step( action) # reward in original rlbench is binary for success or not # distance between current pos (tip) and target pos x, y, z, qx, qy, qz, qw = self.task._robot.arm.get_tip().get_pose() tar_x, tar_y, tar_z, _, _, _, _ = self.task._task.target.get_pose() distance = (np.abs(x - tar_x) + np.abs(y - tar_y) + np.abs(z - tar_z)) reward -= 0.5 * distance # speed regulation joint_velocities = self.task._robot.arm.get_joint_velocities() velocities = np.inner(joint_velocities, joint_velocities) reward -= 0.1 / len(joint_velocities) * velocities return self._get_state(obs), reward, terminate, None def close(self): self.env.shutdown()