class RLBenchEnv(): """ make RLBench env to have same interfaces as openai.gym """ def __init__(self, task_name, state_type_list=['left_shoulder_rgb']): obs_config = ObservationConfig() obs_config.set_all(True) action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY) self.env = Environment( action_mode, obs_config=obs_config, headless=False) self.env.launch() try: self.task = self.env.get_task(getattr(sys.modules[__name__], task_name)) except: raise NotImplementedError _, obs = self.task.reset() if len(state_type_list)>0: self.observation_space=[] for state_type in state_type_list: state=getattr(obs, state_type) self.observation_space.append(spaces.Box(low=-np.inf, high=np.inf, shape=state.shape)) else: raise ValueError('No State Type!') self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(action_mode.action_size,), dtype=np.float32) self.state_type_list = state_type_list self.spec = Spec(task_name) def seed(self, seed_value): # set seed as in openai.gym env pass def render(self): # render the scene pass def _get_state(self, obs): if len(self.state_type_list)>0: state=[] for state_type in self.state_type_list: if state_type in image_types: image = getattr(obs, state_type) image=np.moveaxis(image, 2, 0) # change (H, W, C) to (C, H, W) for torch state.append(image) else: state.append(getattr(obs, state_type)) else: raise ValueError('State Type Not Exists!') return state def reset(self): descriptions, obs = self.task.reset() return self._get_state(obs) def step(self, action): obs_, reward, terminate = self.task.step(action) # reward in original rlbench is binary for success or not return self._get_state(obs_), reward, terminate, None def close(self): self.env.shutdown()
def collect_demos(env: Environment, task: Task, taskname, n_iter_per_var=50, n_demos_per_iter=1000): env.launch() task_env = env.get_task(task) for variation_index in range(1): #task_env.variation_count()): # set variation task_env.set_variation(variation_index) description, _ = task_env.reset() # collect and save demos start = time.time() for i in range(n_iter_per_var): np.random.seed(6) demos = task_env.get_demos(n_demos_per_iter, live_demos=True, max_attempts=1) demos = np.array(demos).flatten() print(demos.shape) save_to_hdf5(demos, taskname, description, variation_index) print("TIME: ", time.time() - start) # env.shutdown() return demos
class SimulationEnvionment(): """ This can be a parent class from which we can have multiple child classes that can diversify for different tasks and deeper functions within the tasks. """ def __init__(self,\ action_mode=DEFAULT_ACTION_MODE,\ task=DEFAULT_TASK,\ dataset_root='',\ headless=True): obs_config = ObservationConfig() obs_config.set_all(True) action_mode = action_mode self.env = Environment(action_mode, dataset_root, obs_config=obs_config, headless=headless) # Dont need to call launch as task.get_task can launch env. self.task = self.env.get_task(task) _, obs = self.task.reset() self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(self.env.action_size, ), dtype=np.float32) self.logger = logger.create_logger(__class__.__name__) self.logger.propagate = 0 def _get_state(self, obs: Observation): """ This will be a hook over the environment to get the state. This will ensure that if changes need to be made to attributes of `Observation` then they can be made in a modular way. """ raise NotImplementedError() def reset(self): descriptions, obs = self.task.reset() return self._get_state(obs) def step(self, action): obs_, reward, terminate = self.task.step( action) # reward in original rlbench is binary for success or not return self._get_state(obs_), reward, terminate def shutdown(self): self.logger.info( "Environment Shutdown! Create New Instance If u want to start again" ) self.logger.handlers.pop() self.env.shutdown() def get_demos(self, num_demos): """ Should be implemented by child classes because other extra properties needed before training can be accessed via that. """ raise NotImplementedError()
def sub_proc(child): action_mode = ActionMode() env = Environment(action_mode, headless=child) env.launch() task = env.get_task(CloseMicrowave) descriptions, obs = task.reset() for i in range(1000): obs, reward, terminate = task.step(np.random.normal(np.zeros(action_mode.action_size)))
def act(graph,model,sess): obs_config = ObservationConfig() # obs_config.set_all(True) # 这几个参数用来关闭摄像头 obs_config.right_shoulder_camera.set_all(False) obs_config.wrist_camera.set_all(False) obs_config.left_shoulder_camera.set_all(False) action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY) env = Environment( action_mode, obs_config=obs_config, headless=False) env.launch() task = env.get_task(ReachTarget) training_steps = 150 episode_length = 60 obs = None gripper = [1.0] # 爪子保持打开 for i in range(training_steps): # 初始化一次任务,包括初始Obs数据 if i % episode_length == 0: print('Reset Episode') descriptions, obs = task.reset() print(descriptions) # 网络的输入 vision = obs.front_rgb vision = vision[np.newaxis,:] state = obs.joint_positions state = state[np.newaxis,:] action = None feed_dict = { model.vision:vision, model.state:state, model.action:action } run_ops = [model.predict_out] with graph.as_default(): res = sess.run(run_ops,feed_dict=feed_dict) # print(res[0][0].shape) # print(gripper.shape) # input('check shape') action = np.concatenate([res[0][0],gripper],axis=-1) obs, reward, terminate = task.step(action) if reward == 1: print("successful try!") print('Done') env.shutdown()
def simulation(): # See rlbench/action_modes.py for other action modes action_mode = ActionMode(ArmActionMode.ABS_EE_POSE_PLAN) action_mode_v = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY) env = Environment(action_mode_v, '', ObservationConfig(), False) task = env.get_task(EmptyContainer) obj_pose_sensor = NoisyObjectPoseSensor(env) descriptions, obs = task.reset() allPoses = obj_pose_sensor.get_poses() all_object_poses = [ allPoses["Shape0"], allPoses["Shape1"], allPoses["Shape4"] ] all_object_poses.sort( key=lambda x: np.linalg.norm(x[:3] - allPoses["large_container"][:3])) agent = RandomAgent(allPoses["large_container"], allPoses["small_container0"], env) print(descriptions) obj_poses = obj_pose_sensor.get_poses() training_steps = 10000 episode_length = 1 i = 0 while i < training_steps: # Getting noisy object poses obj_poses = obj_pose_sensor.get_poses() mask = env._scene._cam_wrist_mask.capture_rgb() depth = (obs.wrist_depth, obs.wrist_rgb, mask) if agent.isResetting: #if agent._graspState == 1: #q.put(depth) nextState = agent.resetAct(obs, obj_poses) obs, reward, terminate = task.step(nextState) if not agent.isResetting: task._action_mode = action_mode_v env._action_mode = action_mode_v env._set_arm_control_action() else: if i > 0 and i % episode_length == 0: print(i, 'Reset Episode') agent.isResetting = True task._action_mode = action_mode env._action_mode = action_mode env._set_arm_control_action() agent.updateV() continue oldState, action = agent.act(obs) obs, reward, terminate = task.step(action) print(reward) nextState = np.zeros(8) nextState[:7] = obs.joint_positions nextState[7] = obs.gripper_open > 0.5 agent.updateQ(oldState, action, nextState, reward) i += 1 env.shutdown()
def _update_info_dict(self): # update info dict self._info["action mode"] = self._action_mode self._info["observation mode"] = self._observation_mode # TODO: action dim should related to robot, not action mode, here we fixed it temporally self._info["action dim"] = (self._action_config.action_size, ) self._info["action low"] = -np.ones(self._info["action dim"], dtype=np.float32) self._info["action high"] = np.ones(self._info["action dim"], dtype=np.float32) if self._observation_mode == "state" or self._observation_mode == "all": # TODO: observation should be determined without init the entire environment with suppress_stdout(): env = RLEnvironment(action_mode=self._action_config, obs_config=self._observation_config, headless=True, robot_configuration=self._robot_name) env.launch() task = env.get_task(get_named_class(self._task_name, tasks)) _, obs = task.reset() env.shutdown() del task del env self._info["time step"] = _DT self._info["state dim"] = tuple(obs.get_low_dim_data().shape) self._info["state low"] = np.ones(self._info["state dim"], dtype=np.float32) * -np.inf self._info["state high"] = np.ones(self._info["state dim"], dtype=np.float32) * np.inf if self._observation_mode == "vision" or self._observation_mode == "all": self._info["left shoulder rgb dim"] = tuple( self._observation_config.left_shoulder_camera.image_size) + ( 3, ) self._info["left shoulder rgb low"] = np.zeros( self._info["left shoulder rgb dim"], dtype=np.float32) self._info["left shoulder rgb high"] = np.ones( self._info["left shoulder rgb dim"], dtype=np.float32) self._info["right shoulder rgb dim"] = tuple( self._observation_config.right_shoulder_camera.image_size) + ( 3, ) self._info["right shoulder rgb low"] = np.zeros( self._info["right shoulder rgb dim"], dtype=np.float32) self._info["right shoulder rgb high"] = np.ones( self._info["right shoulder rgb dim"], dtype=np.float32) self._info["wrist rgb dim"] = tuple( self._observation_config.wrist_camera.image_size) + (3, ) self._info["wrist rgb low"] = np.zeros(self._info["wrist rgb dim"], dtype=np.float32) self._info["wrist rgb high"] = np.ones(self._info["wrist rgb dim"], dtype=np.float32) self._info["reward low"] = -np.inf self._info["reward high"] = np.inf
def simulation(q): # See rlbench/action_modes.py for other action modes withLearning = False action_mode = ActionMode(ArmActionMode.ABS_EE_POSE_PLAN) action_mode_v = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY) if withLearning: env = Environment(action_mode_v, '', ObservationConfig(), False) else: env = Environment(action_mode, '', ObservationConfig(), False) task = env.get_task(EmptyContainer) obj_pose_sensor = NoisyObjectPoseSensor(env) descriptions, obs = task.reset() allPoses = obj_pose_sensor.get_poses() agent = RandomAgent(allPoses["large_container"], allPoses["small_container0"], env) print(descriptions) obj_poses = obj_pose_sensor.get_poses() training_steps = 10000 episode_length = 100 i = 0 rewardFunc = Reward(env, ['Shape0', 'Shape2', 'Shape4'], "small_container0") while i < training_steps: # Getting noisy object poses obj_poses = obj_pose_sensor.get_poses() mask = env._scene._cam_wrist_mask.capture_rgb() data = (obs.wrist_depth, obs.wrist_rgb, mask) if withLearning: if i > 0 and i % episode_length == 0: print(i, 'Reset Episode') agent.updateV() i += 1 continue oldState, action = agent.act(obs) obs, _, terminate = task.step(action) reward = rewardFunc.evaluate(obs) print(reward) nextState = np.zeros(8) nextState[:7] = obs.joint_positions nextState[7] = obs.gripper_open > 0.5 agent.updateQ(oldState, action, nextState, reward) i += 1 else: q.put(data) nextState = agent.resetAct(obs, obj_poses) obs, reward, terminate = task.step(nextState) # if terminate: # break env.shutdown()
class RLBenchEnv(): """ make RLBench env to have same interfaces as openai.gym """ def __init__(self, task_name, state_type='left_shoulder_rgb'): obs_config = ObservationConfig() obs_config.set_all(True) action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY) self.env = Environment(action_mode, obs_config=obs_config, headless=False) self.env.launch() try: self.task = self.env.get_task( getattr(sys.modules[__name__], task_name)) except: raise NotImplementedError _, obs = self.task.reset() state = getattr(obs, state_type) self.state_type = state_type self.spec = Spec(task_name) self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(action_mode.action_size, ), dtype=np.float32) self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=state.shape) def seed(self, seed_value): # set seed as in openai.gym env pass def render(self): # render the scene pass def reset(self): descriptions, obs = self.task.reset() return getattr(obs, self.state_type) def step(self, action): obs_, reward, terminate = self.task.step(action) return getattr(obs_, self.state_type), reward, terminate, None def close(self): self.env.shutdown()
def job_worker_validation(worker_id, action_mode, obs_config, task_class, command_q: mp.Queue, result_q: mp.Queue, obs_scaling, redundancy_resolution_setup, seed): np.random.seed(worker_id + seed) # setup the environment env = Environment(action_mode=action_mode, obs_config=obs_config, headless=True) env.launch() task = env.get_task(task_class) task.reset() while True: command = command_q.get() command_type = command[0] command_args = command[1] if command_type == "reset": task.set_variation(task.sample_variation()) descriptions, observation = task.reset() if obs_scaling is not None: observation = observation.get_low_dim_data() / obs_scaling else: observation = observation.get_low_dim_data() result_q.put((descriptions, observation)) elif command_type == "step": actions = command_args[0] # check if we need to resolve redundancy if redundancy_resolution_setup["use_redundancy_resolution"]: actions[0:7], L = task.resolve_redundancy_joint_velocities( actions=actions[0:7], setup=redundancy_resolution_setup) else: # here we only care about the loss (for logging) _, L = task.resolve_redundancy_joint_velocities( actions=actions[0:7], setup=redundancy_resolution_setup) next_observation, reward, done = task.step(actions) if obs_scaling is not None: next_observation = next_observation.get_low_dim_data( ) / obs_scaling else: next_observation = next_observation.get_low_dim_data() result_q.put((next_observation, reward, done, L)) elif command_type == "kill": print("Killing worker %d" % worker_id) env.shutdown() break
class TestEnvironment(unittest.TestCase): def tearDown(self): self.env.shutdown() def get_task(self, task_class, arm_action_mode): obs_config = ObservationConfig() obs_config.set_all(False) obs_config.set_all_low_dim(True) obs_config.right_shoulder_camera.rgb = True action_mode = ActionMode(arm_action_mode) self.env = Environment( action_mode, ASSET_DIR, obs_config, headless=True) self.env.launch() return self.env.get_task(task_class) def test_get_task(self): task = self.get_task( ReachTarget, ArmActionMode.ABS_JOINT_VELOCITY) self.assertIsInstance(task, TaskEnvironment) self.assertEqual(task.get_name(), 'reach_target') def test_reset(self): task = self.get_task( ReachTarget, ArmActionMode.ABS_JOINT_VELOCITY) desc, obs = task.reset() self.assertIsNotNone(obs.right_shoulder_rgb) self.assertIsNone(obs.left_shoulder_rgb) self.assertIsInstance(desc, list) def test_step(self): task = self.get_task( ReachTarget, ArmActionMode.ABS_JOINT_VELOCITY) task.reset() obs, reward, term = task.step(np.random.uniform(size=8)) self.assertIsNotNone(obs.right_shoulder_rgb) self.assertIsNone(obs.left_shoulder_rgb) self.assertEqual(reward, 0) self.assertFalse(term) def test_get_invalid_number_of_demos(self): task = self.get_task( ReachTarget, ArmActionMode.ABS_JOINT_VELOCITY) with self.assertRaises(RuntimeError): task.get_demos(10, live_demos=False, image_paths=True) def test_get_stored_demos_paths(self): task = self.get_task( ReachTarget, ArmActionMode.ABS_JOINT_VELOCITY) demos = task.get_demos(2, live_demos=False, image_paths=True) self.assertEqual(len(demos), 2) self.assertGreater(len(demos[0]), 0) self.assertIsInstance(demos[0][0].right_shoulder_rgb, str) self.assertIsNone(demos[0][0].left_shoulder_rgb) def test_get_stored_demos_images(self): task = self.get_task( ReachTarget, ArmActionMode.ABS_JOINT_VELOCITY) demos = task.get_demos(2, live_demos=False, image_paths=False) self.assertEqual(len(demos), 2) self.assertGreater(len(demos[0]), 0) self.assertIsInstance(demos[0][0].right_shoulder_rgb, np.ndarray) self.assertIsNone(demos[0][0].left_shoulder_rgb) def test_get_live_demos(self): task = self.get_task( ReachTarget, ArmActionMode.ABS_JOINT_VELOCITY) demos = task.get_demos(2, live_demos=True) self.assertEqual(len(demos), 2) self.assertGreater(len(demos[0]), 0) self.assertIsInstance(demos[0][0].right_shoulder_rgb, np.ndarray) def test_action_mode_abs_joint_velocity(self): task = self.get_task( ReachTarget, ArmActionMode.ABS_JOINT_VELOCITY) task.reset() action = [0.1] * 8 obs, reward, term = task.step(action) [self.assertAlmostEqual(0.1, a, delta=0.05) for a in obs.joint_velocities] def test_action_mode_delta_joint_velocity(self): task = self.get_task( ReachTarget, ArmActionMode.DELTA_JOINT_VELOCITY) task.reset() action = [-0.1] * 8 [task.step(action) for _ in range(2)] obs, reward, term = task.step(action) [self.assertAlmostEqual(-0.3, a, delta=0.1) for a in obs.joint_velocities] def test_action_mode_abs_joint_position(self): task = self.get_task( ReachTarget, ArmActionMode.ABS_JOINT_POSITION) _, obs = task.reset() init_angles = np.append(obs.joint_positions, 0.) # for gripper target_angles = np.array(init_angles) + 0.05 [task.step(target_angles) for _ in range(5)] obs, reward, term = task.step(target_angles) [self.assertAlmostEqual(a, actual, delta=0.05) for a, actual in zip(target_angles, obs.joint_positions)] def test_action_mode_delta_joint_position(self): task = self.get_task( ReachTarget, ArmActionMode.DELTA_JOINT_POSITION) _, obs = task.reset() init_angles = obs.joint_positions target_angles = np.array(init_angles) + 0.06 [task.step([0.01] * 8) for _ in range(5)] obs, reward, term = task.step([0.01] * 8) [self.assertAlmostEqual(a, actual, delta=0.05) for a, actual in zip(target_angles, obs.joint_positions)] def test_action_mode_abs_ee_position(self): task = self.get_task( ReachTarget, ArmActionMode.ABS_EE_POSE) _, obs = task.reset() init_pose = obs.gripper_pose new_pose = np.append(init_pose, 0.0) # for gripper new_pose[2] -= 0.1 # 10cm down obs, reward, term = task.step(new_pose) [self.assertAlmostEqual(a, p, delta=0.001) for a, p in zip(new_pose, obs.gripper_pose)] def test_action_mode_delta_ee_position(self): task = self.get_task( ReachTarget, ArmActionMode.DELTA_EE_POSE) _, obs = task.reset() init_pose = obs.gripper_pose new_pose = [0, 0, -0.1, 0, 0, 0, 1.0, 0.0] # 10cm down expected_pose = list(init_pose) expected_pose[2] -= 0.1 obs, reward, term = task.step(new_pose) [self.assertAlmostEqual(a, p, delta=0.001) for a, p in zip(expected_pose, obs.gripper_pose)] def test_action_mode_abs_ee_velocity(self): VEL = -0.2 # move down with velocity 0.1 task = self.get_task( ReachTarget, ArmActionMode.ABS_EE_VELOCITY) _, obs = task.reset() expected_pose = obs.gripper_pose expected_pose[2] += (VEL * 0.05) vels = np.zeros((8,)) vels[2] += VEL obs, reward, term = task.step(vels) [self.assertAlmostEqual(a, p, delta=0.001) for a, p in zip(expected_pose, obs.gripper_pose)] def test_action_mode_delta_velocity(self): VEL = -0.2 # move down with velocity 0.2 task = self.get_task( ReachTarget, ArmActionMode.DELTA_EE_VELOCITY) _, obs = task.reset() expected_pose = obs.gripper_pose expected_pose[2] += (VEL * 0.05) expected_pose[2] += ((VEL * 2) * 0.05) expected_pose[2] += ((VEL * 3) * 0.05) vels = np.zeros((8,)) vels[2] += VEL [task.step(vels) for _ in range(2)] obs, reward, term = task.step(vels) [self.assertAlmostEqual(a, p, delta=0.01) for a, p in zip(expected_pose, obs.gripper_pose)] def test_action_mode_abs_joint_torque(self): task = self.get_task( ReachTarget, ArmActionMode.ABS_JOINT_TORQUE) task.reset() action = [0.1, -0.1, 0.1, -0.1, 0.1, -0.1, 0.1, 0.0] obs, reward, term = task.step(action) # Difficult to test given gravity, so just check for exceptions. def test_action_mode_delta_joint_torque(self): task = self.get_task( ReachTarget, ArmActionMode.DELTA_JOINT_TORQUE) _, obs = task.reset() init_forces = np.array(obs.joint_forces) action = [0.1, -0.1, 0.1, -0.1, 0.1, -0.1, 0.1, 0] expected = np.array([0.3, -0.3, 0.3, -0.3, 0.3, -0.3, 0.3]) expected += init_forces [task.step(action) for _ in range(2)] obs, reward, term = task.step(action)
self.action_size = action_size def act(self, obs): return (np.random.normal(0.0, 0.1, size=(self.action_size, ))).tolist() obs_config = ObservationConfig() obs_config.set_all(False) obs_config.left_shoulder_camera.rgb = True obs_config.right_shoulder_camera.rgb = True action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY) env = Environment(action_mode, obs_config=obs_config, headless=False) env.launch() task = env.get_task(PickAndLift) agent = Agent(action_mode.action_size) training_steps = 120 episode_length = 40 obs = None for i in range(training_steps): if i % episode_length == 0: print('Reset Episode') descriptions, obs = task.reset() print(descriptions) action = agent.act(obs) print(action) obs, reward, terminate = task.step(action)
class RLBenchEnv(gym.Env): """An gym wrapper for RLBench.""" metadata = {'render.modes': ['human', 'rgb_array']} def __init__(self, task_class, observation_mode='state', render_mode: Union[None, str] = None): self._observation_mode = observation_mode self._render_mode = render_mode obs_config = ObservationConfig() if observation_mode == 'state': obs_config.set_all_high_dim(False) obs_config.set_all_low_dim(True) elif observation_mode == 'vision': obs_config.set_all(True) else: raise ValueError('Unrecognised observation_mode: %s.' % observation_mode) action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY) self.env = Environment(action_mode, obs_config=obs_config, headless=True) self.env.launch() self.task = self.env.get_task(task_class) _, obs = self.task.reset() self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(self.env.action_size, )) if observation_mode == 'state': self.observation_space = spaces.Box( low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape) elif observation_mode == 'vision': self.observation_space = spaces.Dict({ "state": spaces.Box(low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape), "left_shoulder_rgb": spaces.Box(low=0, high=1, shape=obs.left_shoulder_rgb.shape), "right_shoulder_rgb": spaces.Box(low=0, high=1, shape=obs.right_shoulder_rgb.shape), "wrist_rgb": spaces.Box(low=0, high=1, shape=obs.wrist_rgb.shape), "front_rgb": spaces.Box(low=0, high=1, shape=obs.front_rgb.shape), }) if render_mode is not None: # Add the camera to the scene cam_placeholder = Dummy('cam_cinematic_placeholder') self._gym_cam = VisionSensor.create([640, 360]) self._gym_cam.set_pose(cam_placeholder.get_pose()) if render_mode == 'human': self._gym_cam.set_render_mode(RenderMode.OPENGL3_WINDOWED) else: self._gym_cam.set_render_mode(RenderMode.OPENGL3) def _extract_obs(self, obs) -> Dict[str, np.ndarray]: if self._observation_mode == 'state': return obs.get_low_dim_data() elif self._observation_mode == 'vision': return { "state": obs.get_low_dim_data(), "left_shoulder_rgb": obs.left_shoulder_rgb, "right_shoulder_rgb": obs.right_shoulder_rgb, "wrist_rgb": obs.wrist_rgb, "front_rgb": obs.front_rgb, } def render(self, mode='human') -> Union[None, np.ndarray]: if mode != self._render_mode: raise ValueError( 'The render mode must match the render mode selected in the ' 'constructor. \nI.e. if you want "human" render mode, then ' 'create the env by calling: ' 'gym.make("reach_target-state-v0", render_mode="human").\n' 'You passed in mode %s, but expected %s.' % (mode, self._render_mode)) if mode == 'rgb_array': return self._gym_cam.capture_rgb() def reset(self) -> Dict[str, np.ndarray]: descriptions, obs = self.task.reset() del descriptions # Not used. return self._extract_obs(obs) def step(self, action) -> Tuple[Dict[str, np.ndarray], float, bool, dict]: obs, reward, terminate = self.task.step(action) return self._extract_obs(obs), reward, terminate, {} def close(self) -> None: self.env.shutdown()
class SimulationEnvironment(): """ This can be a parent class from which we can have multiple child classes that can diversify for different tasks and deeper functions within the tasks. """ def __init__(self, task_name, state_type_list=['left_shoulder_rgb'], dataset_root='', observation_mode='state', headless=True): obs_config = ObservationConfig() obs_config.set_all(True) action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY) self._observation_mode = observation_mode self.env = Environment( action_mode, dataset_root, obs_config=obs_config, headless=headless) # Dont need to call launch as task.get_task can launch env. self.env.launch() self.task = self.env.get_task(task_name) _, obs = self.task.reset() self.action_space = spaces.Box( low=-1.0, high=1.0, shape=(self.env.action_size,), dtype=np.float32) # self.logger = logger.create_logger(__class__.__name__) # self.logger.propagate = 0 if len(state_type_list) > 0: self.observation_space = [] # for state_type in state_type_list: # state = getattr(obs, state_type) self.observation_space = spaces.Box( low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape) else: raise ValueError('No State Type!') self.state_type_list = state_type_list def _get_state(self, obs): if len(self.state_type_list) > 0: if self._observation_mode == 'state': return self.get_low_dim_data(obs) elif self._observation_mode == 'vision': return None def get_low_dim_data(self, obs) -> np.ndarray: """Gets a 1D array of all the low-dimensional obseervations. :return: 1D array of observations. """ low_dim_data = [] if obs.gripper_open is None else [[obs.gripper_open]] for data in [obs.joint_velocities, obs.joint_positions, obs.joint_forces, obs.gripper_pose, obs.gripper_joint_positions, obs.gripper_touch_forces, obs.task_low_dim_state]: if data is not None: low_dim_data.append(data) return np.concatenate(low_dim_data) def reset(self): descriptions, obs = self.task.reset() return self._get_state(obs) def step(self, action): # reward in original rlbench is binary for success or not obs_, reward, terminate = self.task.step(action) return self._get_state(obs_), reward, terminate, None def shutdown(self): # self.logger.info("Environment Shutdown! Create New Instance If u want to start again") # self.logger.handlers.pop() self.env.shutdown()
class RLBenchBox(TaskInterface): def __init__(self, task_class, state_dim, n_features, headless=True): super().__init__(n_features) self._group = Group("rlbench", ["d%d" % i for i in range(7)] + ["gripper"]) self._space = ClassicSpace(self._group, n_features) obs_config = ObservationConfig() obs_config.set_all_low_dim(True) obs_config.set_all_high_dim(False) self._obs_config = obs_config self._state_dim = state_dim self._headless = headless action_mode = ActionMode(ArmActionMode.ABS_JOINT_POSITION) self._task_class = task_class self._action_mode = action_mode self.env = Environment( action_mode, "", obs_config, headless=headless) self.env.launch() self.task = self.env.get_task(task_class) self._obs = None def get_context_dim(self): return self._state_dim def read_context(self): return self._obs[1].task_low_dim_state def get_demonstrations(self): file = "parameters/%s_%d.npy" % (self.task.get_name(), self._space.n_features) try: return np.load(file) except: raise Exception("File %s not found. Please consider running 'dataset_generator.py'" % file) def _stop(self, joint_gripper, previous_reward): if previous_reward == 0.: success = 0. for _ in range(50): obs, reward, terminate = self.task.step(joint_gripper) if reward == 1.0: success = 1. break return self.task._task.get_dense_reward(), success return self.task._task.get_dense_reward(), 1. def send_movement(self, weights, duration): mp = MovementPrimitive(self._space, MovementPrimitive.get_params_from_block(self._space, weights)) duration = 1 if duration < 0 else duration if self._headless: trajectory = mp.get_full_trajectory(duration=min(duration, 1), frequency=200) else: trajectory = mp.get_full_trajectory(duration=5 * duration, frequency=200) tot_reward = 0. success = 0 for a1 in trajectory.values: # , a2 in zip(trajectory.values[:-1, :], trajectory.values[1:, :]): joint = a1 # (a2-a1)*20. joint_gripper = joint obs, reward, terminate = self.task.step(joint_gripper) if reward == 1. or terminate == 1.: if reward == 1.: success = 1. break tot_reward, success = self._stop(joint_gripper, success) return success, tot_reward def reset(self): self._obs = self.task.reset()
from src.container_reset_agent import ContainerResetAgent from src.rl_fwd_agent import RlFwdAgent # Setup Environment: obs_config = ObservationConfig() obs_config.set_all(True) # obs_config.left_shoulder_camera.rgb = True # obs_config.right_shoulder_camera.rgb = True action_mode = ActionMode(ArmActionMode.ABS_EE_POSE_PLAN) static_positions = True # This will ensure non-random initialization env = Environment(action_mode, '', obs_config, static_positions=static_positions) task = env.get_task(EmptyContainer) task.reset() # Execute FWD policy fwd_agent = RlFwdAgent(env, task) fwd_agent.empty_the_container() # Reset the environment # Reset Agent 1 # reset_agent = RetryResetAgent(env, task) # reset_agent.reset_env() # Reset Agent 2 container_reset_agent = ContainerResetAgent(env, task) container_reset_agent.reset_env()
def run(i, lock, task_index, variation_count, results, file_lock, tasks): """Each thread will choose one task and variation, and then gather all the episodes_per_task for that variation.""" # Initialise each thread with random seed np.random.seed(None) num_tasks = len(tasks) img_size = list(map(int, FLAGS.image_size)) obs_config = ObservationConfig() obs_config.set_all(True) obs_config.right_shoulder_camera.image_size = img_size obs_config.left_shoulder_camera.image_size = img_size obs_config.wrist_camera.image_size = img_size obs_config.front_camera.image_size = img_size # We want to save the masks as rgb encodings. obs_config.left_shoulder_camera.masks_as_one_channel = False obs_config.right_shoulder_camera.masks_as_one_channel = False obs_config.wrist_camera.masks_as_one_channel = False obs_config.front_camera.masks_as_one_channel = False if FLAGS.renderer == 'opengl': obs_config.right_shoulder_camera.render_mode = RenderMode.OPENGL obs_config.left_shoulder_camera.render_mode = RenderMode.OPENGL obs_config.wrist_camera.render_mode = RenderMode.OPENGL obs_config.front_camera.render_mode = RenderMode.OPENGL rlbench_env = Environment(action_mode=ActionMode(), obs_config=obs_config, headless=True) rlbench_env.launch() task_env = None tasks_with_problems = results[i] = '' while True: # Figure out what task/variation this thread is going to do with lock: if task_index.value >= num_tasks: print('Process', i, 'finished') break my_variation_count = variation_count.value t = tasks[task_index.value] task_env = rlbench_env.get_task(t) var_target = task_env.variation_count() if FLAGS.variations >= 0: var_target = np.minimum(FLAGS.variations, var_target) if my_variation_count >= var_target: # If we have reached the required number of variations for this # task, then move on to the next task. variation_count.value = my_variation_count = 0 task_index.value += 1 variation_count.value += 1 if task_index.value >= num_tasks: print('Process', i, 'finished') break t = tasks[task_index.value] print('Process', i, 'collecting task:', task_env.get_name(), '// variation:', my_variation_count) task_env = rlbench_env.get_task(t) task_env.set_variation(my_variation_count) obs, descriptions = task_env.reset() variation_path = os.path.join(FLAGS.save_path, task_env.get_name(), VARIATIONS_FOLDER % my_variation_count) check_and_make(variation_path) with open(os.path.join(variation_path, VARIATION_DESCRIPTIONS), 'wb') as f: pickle.dump(descriptions, f) episodes_path = os.path.join(variation_path, EPISODES_FOLDER) check_and_make(episodes_path) abort_variation = False for ex_idx in range(FLAGS.episodes_per_task): attempts = 10 while attempts > 0: try: # TODO: for now we do the explicit looping. demo, = task_env.get_demos(amount=1, live_demos=True) except Exception as e: attempts -= 1 if attempts > 0: continue problem = ( 'Process %d failed collecting task %s (variation: %d, ' 'example: %d). Skipping this task/variation.\n%s\n' % (i, task_env.get_name(), my_variation_count, ex_idx, str(e))) print(problem) tasks_with_problems += problem abort_variation = True break episode_path = os.path.join(episodes_path, EPISODE_FOLDER % ex_idx) with file_lock: save_demo(demo, episode_path) break if abort_variation: break results[i] = tasks_with_problems rlbench_env.shutdown()
obs_config.right_shoulder_camera.rgb = True action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY) env = Environment(action_mode, obs_config=obs_config, headless=False) env.launch() agent = Agent(action_mode.action_size) train_tasks = MT30_V1['train'] training_cycles_per_task = 3 training_steps_per_task = 80 episode_length = 40 for _ in range(training_cycles_per_task): task_to_train = np.random.choice(train_tasks, 1)[0] task = env.get_task(task_to_train) task.sample_variation() # random variation for i in range(training_steps_per_task): if i % episode_length == 0: print('Reset Episode') descriptions, obs = task.reset() print(descriptions) action = agent.act(obs) obs, reward, terminate = task.step(action) print('Done') env.shutdown()
class RLBenchEnv(gym.GoalEnv): """An gym wrapper for RLBench.""" metadata = {'render.modes': ['human']} def __init__(self, task_class, observation_mode='state', randomization_mode="none", rand_config=None, img_size=256, special_start=[], fixed_grip=-1, force_randomly_place=False, force_change_position=False, sparse=False, not_special_p = 0, ground_p = 0, special_is_grip=False, altview=False, procedural_ind=0, procedural_mode='same', procedural_set = [], is_mesh_obs=False, blank=False, COLOR_TUPLE=[1,0,0]): # blank is 0s agent. self.blank = blank #altview is whether to have second camera angle or not. True/False, "both" to concatentae the observations. self.altview=altview self.img_size=img_size self.sparse = sparse self.task_class = task_class self._observation_mode = observation_mode self._randomization_mode = randomization_mode #special start is a list of actions to take at the beginning. self.special_start = special_start #fixed_grip temp hack for keeping the gripper a certain way. Change later. 0 for fixed closed, 0.1 for fixed open, -1 for not fixed self.fixed_grip = fixed_grip #to force the task to be randomly placed self.force_randomly_place = force_randomly_place #force the task to change position in addition to rotation self.force_change_position = force_change_position obs_config = ObservationConfig() if observation_mode == 'state': obs_config.set_all_high_dim(False) obs_config.set_all_low_dim(True) elif observation_mode == 'vision' or observation_mode=="visiondepth" or observation_mode=="visiondepthmask": # obs_config.set_all(True) obs_config.set_all_high_dim(False) obs_config.set_all_low_dim(True) else: raise ValueError( 'Unrecognised observation_mode: %s.' % observation_mode) action_mode = ActionMode(ArmActionMode.DELTA_EE_POSE_PLAN_NOQ) print("using delta pose pan") if randomization_mode == "random": objs = ['target', 'boundary', 'Floor', 'Roof', 'Wall1', 'Wall2', 'Wall3', 'Wall4', 'diningTable_visible'] if rand_config is None: assert False self.env = DomainRandomizationEnvironment( action_mode, obs_config=obs_config, headless=True, randomize_every=RandomizeEvery.EPISODE, frequency=1, visual_randomization_config=rand_config ) else: self.env = Environment( action_mode, obs_config=obs_config, headless=True ) self.env.launch() self.task = self.env.get_task(task_class) # Probability. Currently used for probability that pick and lift task will start off gripper at a certain location (should probs be called non_special p) self.task._task.not_special_p = not_special_p # Probability that ground goal. self.task._task.ground_p = ground_p # For the "special" case, whether to grip the object or just hover above it. self.task._task.special_is_grip = special_is_grip # for procedural env self.task._task.procedural_ind = procedural_ind # procedural mode: same, increase, or random. self.task._task.procedural_mode = procedural_mode # ideally a list-like object, dictates the indices to sample from each episode. self.task._task.procedural_set = procedural_set # if state obs is mesh obs self.task._task.is_mesh_obs = is_mesh_obs self.task._task.sparse = sparse self.task._task.COLOR_TUPLE = COLOR_TUPLE _, obs = self.task.reset() cam_placeholder = Dummy('cam_cinematic_placeholder') cam_pose = cam_placeholder.get_pose().copy() #custom pose cam_pose = [ 1.59999931, 0. , 2.27999949 , 0.65328157, -0.65328145, -0.27059814, 0.27059814] cam_pose[0] = 1 cam_pose[2] = 1.5 self.frontcam = VisionSensor.create([img_size, img_size]) self.frontcam.set_pose(cam_pose) self.frontcam.set_render_mode(RenderMode.OPENGL) self.frontcam.set_perspective_angle(60) self.frontcam.set_explicit_handling(1) self.frontcam_mask = VisionSensor.create([img_size, img_size]) self.frontcam_mask.set_pose(cam_pose) self.frontcam_mask.set_render_mode(RenderMode.OPENGL_COLOR_CODED) self.frontcam_mask.set_perspective_angle(60) self.frontcam_mask.set_explicit_handling(1) if altview: alt_pose = [0.25 , -0.65 , 1.5, 0, 0.93879825 ,0.34169483 , 0] self.altcam = VisionSensor.create([img_size, img_size]) self.altcam.set_pose(alt_pose) self.altcam.set_render_mode(RenderMode.OPENGL) self.altcam.set_perspective_angle(60) self.altcam.set_explicit_handling(1) self.altcam_mask = VisionSensor.create([img_size, img_size]) self.altcam_mask.set_pose(alt_pose) self.altcam_mask.set_render_mode(RenderMode.OPENGL_COLOR_CODED) self.altcam_mask.set_perspective_angle(60) self.altcam_mask.set_explicit_handling(1) self.action_space = spaces.Box( low=-1.0, high=1.0, shape=(action_mode.action_size,)) if observation_mode == 'state': self.observation_space = spaces.Dict({ "observation": spaces.Box( low=-np.inf, high=np.inf, shape=self.task._task.get_state_obs().shape), "achieved_goal": spaces.Box( low=-np.inf, high=np.inf, shape=self.task._task.get_achieved_goal().shape), 'desired_goal': spaces.Box( low=-np.inf, high=np.inf, shape=self.task._task.get_desired_goal().shape) }) # Use the frontvision cam elif observation_mode == 'vision': self.frontcam.handle_explicitly() self.observation_space = spaces.Dict({ "state": spaces.Box( low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape), "observation": spaces.Box( low=0, high=1, shape=self.frontcam.capture_rgb().transpose(2,0,1).flatten().shape, ) }) if altview == "both": example = self.frontcam.capture_rgb().transpose(2,0,1).flatten() self.observation_space = spaces.Dict({ "state": spaces.Box( low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape), "observation": spaces.Box( low=0, high=1, shape=np.array([example,example]).shape, ) }) elif observation_mode == 'visiondepth': self.frontcam.handle_explicitly() self.observation_space = spaces.Dict({ "state": spaces.Box( low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape), "observation": spaces.Box( #thinking about not flattening the shape, because primarily used for dataset and not for training low=0, high=1, shape=np.array([self.frontcam.capture_rgb().transpose(2,0,1), self.frontcam.capture_depth()[None,...]]).shape, ) }) elif observation_mode == 'visiondepthmask': self.frontcam.handle_explicitly() self.frontcam_mask.handle_explicitly() self.observation_space = spaces.Dict({ "state": spaces.Box( low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape), "observation": spaces.Box( #thinking about not flattening the shape, because primarily used for dataset and not for training low=0, high=1, shape=np.array([self.frontcam.capture_rgb().transpose(2,0,1), self.frontcam.capture_depth()[None,...], rgb_handles_to_mask(self.frontcam_mask.capture_rgb())]).shape, ) }) if altview == "both": self.observation_space = spaces.Dict({ "state": spaces.Box( low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape), "observation": spaces.Box( #thinking about not flattening the shape, because primarily used for dataset and not for training low=0, high=1, shape=np.array([self.frontcam.capture_rgb().transpose(2,0,1), self.frontcam.capture_rgb().transpose(2,0,1), self.frontcam.capture_depth()[None,...], rgb_handles_to_mask(self.frontcam_mask.capture_rgb())]).shape, ) }) self._gym_cam = None # GoalEnv def compute_reward(self, achieved_goal, desired_goal, info): reward = self.task._task.compute_reward(achieved_goal, desired_goal, info) return reward def _extract_obs(self, obs): if self._observation_mode == 'state': return { 'observation': self.task._task.get_state_obs(), 'achieved_goal': self.task._task.get_achieved_goal(), 'desired_goal':self.task._task.get_desired_goal(), } elif self._observation_mode == 'vision': self.frontcam.handle_explicitly() if self.altview: self.altcam.handle_explicitly() second_view = self.altcam.capture_rgb().transpose(2,0,1).flatten() if self.altview == "both": return { 'achieved_goal': self.task._task.get_achieved_goal(), 'desired_goal':self.task._task.get_desired_goal(), 'save_state': self.task._task.get_save_state(), "observation": np.array([self.frontcam.capture_rgb().transpose(2,0,1).flatten(), second_view]) } return { 'achieved_goal': self.task._task.get_achieved_goal(), 'desired_goal':self.task._task.get_desired_goal(), 'save_state': self.task._task.get_save_state(), "observation": self.altcam.capture_rgb().transpose(2,0,1).flatten() } if self.blank: return { 'achieved_goal': self.task._task.get_achieved_goal(), 'desired_goal':self.task._task.get_desired_goal(), 'save_state': self.task._task.get_save_state(), "observation": np.zeros(self.observation_space['observation'].shape) } return { 'achieved_goal': self.task._task.get_achieved_goal(), 'desired_goal':self.task._task.get_desired_goal(), 'save_state': self.task._task.get_save_state(), "observation": self.frontcam.capture_rgb().transpose(2,0,1).flatten(), } elif self._observation_mode == 'visiondepth': self.frontcam.handle_explicitly() return { "state": obs.get_low_dim_data(), "observation": [self.frontcam.capture_rgb().transpose(2,0,1), self.frontcam.capture_depth()[None,...]] } elif self._observation_mode == 'visiondepthmask': self.frontcam.handle_explicitly() self.frontcam_mask.handle_explicitly() mask = np.array(self.frontcam_mask.capture_rgb()) if self.altview: self.altcam.handle_explicitly() self.altcam_mask.handle_explicitly() altmask = np.array(self.altcam_mask.capture_rgb()) second_view = self.altcam.capture_rgb().transpose(2,0,1) if self.altview == "both": assert False # no use case for this return { "state": obs.get_low_dim_data(), "observation": [self.frontcam.capture_rgb().transpose(2,0,1), self.altcam.capture_rgb().transpose(2,0,1).flatten()] } return { "state": obs.get_low_dim_data(), "observation": [self.altcam.capture_rgb().transpose(2,0,1), self.altcam.capture_depth()[None,...], altmask] } return { "state": obs.get_low_dim_data(), "observation": [self.frontcam.capture_rgb().transpose(2,0,1), self.frontcam.capture_depth()[None,...], mask] } def render(self, mode='human'): self.frontcam.handle_explicitly() return self.frontcam.capture_rgb() def reset(self): _, obs = self.task.reset(force_randomly_place=self.force_randomly_place, force_change_position=self.force_change_position) import time if len(self.special_start) != 0: for a in self.special_start: self.step(a) # Doesn't actually use obs. return self._extract_obs(obs) def step(self, action): if self.fixed_grip >= 0: action[3] = self.fixed_grip obs, reward, terminate = self.task.step(action) reward, done = self.task._task.get_reward_and_done(sparse=self.sparse) return self._extract_obs(obs), reward, done, {"is_success": int(done)} def close(self): self.env.shutdown() def rerender(self, state): self.task._task.restore_full_state(state) return self.render().transpose(2,0,1).flatten() def resample_step(self, state, action, sampled_goal): self.task._task.restore_save_state(state) Shape('target').set_position(sampled_goal) if self.altview == "both": self.frontcam.handle_explicitly() self.altcam.handle_explicitly() o_before = np.array([self.frontcam.capture_rgb().transpose(2,0,1).flatten(), self.altcam.capture_rgb().transpose(2,0,1).flatten()]) elif self.altview: self.altcam.handle_explicitly() o_before = self.altcam.capture_rgb().transpose(2,0,1).flatten() else: if self.blank: o_before = np.zeros(self.observation_space['observation'].shape) else: o_before = self.render().transpose(2,0,1).flatten() o, r, d, _ = self.step(action) o_after = o['observation'] return o_before, r, d, o_after
perturbed_quat_wxyz.z, perturbed_quat_wxyz.w ] obj_poses[name] = pose return obj_poses if __name__ == "__main__": # action_mode = ActionMode(ArmActionMode.DELTA_EE_POSE) # See rlbench/action_modes.py for other action modes # action_mode = ActionMode(ArmActionMode.DELTA_EE_POSE_PLAN) action_mode = ActionMode(ArmActionMode.ABS_EE_POSE_PLAN) env = Environment(action_mode, '', ObservationConfig(), False, frequency=5) # task = env.get_task(StackBlocks) # available tasks: EmptyContainer, PlayJenga, PutGroceriesInCupboard, SetTheTable task = env.get_task( PlayJenga ) # available tasks: EmptyContainer, PlayJenga, PutGroceriesInCupboard, SetTheTable obj_pose_sensor = NoisyObjectPoseSensor(env) descriptions, obs = task.reset() agent = Agent(obs, obj_pose_sensor.get_poses()) while True: # Getting noisy object poses obj_poses = obj_pose_sensor.get_poses() # Getting various fields from obs current_joints = obs.joint_positions gripper_pose = obs.gripper_pose rgb = obs.wrist_rgb depth = obs.wrist_depth
class FakeRLBenchEnv(Environment): ROBOT_NAME = SUPPORTED_ROBOTS.keys() OBSERVATION_MODE = ("state", "vision", "all") ACTION_MODE = { "joint velocity": ArmActionMode.ABS_JOINT_VELOCITY, "delta joint velocity": ArmActionMode.DELTA_JOINT_VELOCITY, "joint position": ArmActionMode.ABS_JOINT_POSITION, "delta joint position": ArmActionMode.DELTA_JOINT_POSITION, "joint torque": ArmActionMode.ABS_JOINT_TORQUE, "delta joint torque": ArmActionMode.DELTA_JOINT_TORQUE, "effector velocity": ArmActionMode.ABS_EE_VELOCITY, "delta effector velocity": ArmActionMode.DELTA_EE_VELOCITY, "effector position": ArmActionMode.ABS_EE_POSE, "delta effector position": ArmActionMode.DELTA_EE_POSE } def __init__(self, task_name: str, observation_mode: str = "state", action_mode: str = "delta joint position", robot_name: str = "panda"): super(FakeRLBenchEnv, self).__init__(task_name) if task_name not in all_class_names(tasks): raise KeyError(f"Error: unknown task name {task_name}") if observation_mode not in FakeRLBenchEnv.OBSERVATION_MODE: raise KeyError( f"Error: unknown observation mode {observation_mode}, available: {FakeRLBenchEnv.OBSERVATION_MODE}" ) if action_mode not in FakeRLBenchEnv.ACTION_MODE: raise KeyError( f"Error: unknown action mode {action_mode}, available: {FakeRLBenchEnv.ACTION_MODE.keys()}" ) if robot_name not in FakeRLBenchEnv.ROBOT_NAME: raise KeyError( f"Error: unknown robot name {robot_name}, available: {FakeRLBenchEnv.ROBOT_NAME}" ) # TODO: modify the task/robot/arm/gripper to support early instantiation before v-rep launched self._observation_mode = observation_mode self._action_mode = action_mode self._task_name = task_name self._robot_name = robot_name self._observation_config = ObservationConfig() if self._observation_mode == "state": self._observation_config.set_all_low_dim(True) self._observation_config.set_all_high_dim(False) elif self._observation_mode == "vision": self._observation_config.set_all_low_dim(False) self._observation_config.set_all_high_dim(True) elif self._observation_mode == "all": self._observation_config.set_all(True) self._action_config = ActionMode( FakeRLBenchEnv.ACTION_MODE[self._action_mode]) self.env = None self.task = None self._update_info_dict() def init(self, display=False): with suppress_stdout(): self.env = RLEnvironment(action_mode=self._action_config, obs_config=self._observation_config, headless=not display, robot_configuration=self._robot_name) self.env.launch() self.task = self.env.get_task( get_named_class(self._task_name, tasks)) def reset(self, random: bool = True) -> StepDict: if not random: np.random.seed(0) self.task._static_positions = not random descriptions, obs = self.task.reset() # Returns a list of descriptions and the first observation next_step = {"opt": descriptions} if self._observation_mode == "state" or self._observation_mode == "all": next_step['s'] = obs.get_low_dim_data() if self._observation_mode == "vision" or self._observation_mode == "all": next_step["left shoulder rgb"] = obs.left_shoulder_rgb next_step["right_shoulder_rgb"] = obs.right_shoulder_rgb next_step["wrist_rgb"] = obs.wrist_rgb return next_step def step(self, last_step: StepDict) -> (StepDict, bool): assert 'a' in last_step, "Key 'a' for action not in last_step, maybe you passed a wrong dict ?" obs, reward, terminate = self.task.step(last_step['a']) last_step['r'] = reward last_step["info"] = {} next_step = {"opt": None} if self._observation_mode == "state" or self._observation_mode == "all": next_step['s'] = obs.get_low_dim_data() if self._observation_mode == "vision" or self._observation_mode == "all": next_step["left shoulder rgb"] = obs.left_shoulder_rgb next_step["right_shoulder_rgb"] = obs.right_shoulder_rgb next_step["wrist_rgb"] = obs.wrist_rgb return last_step, next_step, terminate def finalize(self) -> bool: with suppress_stdout(): self.env.shutdown() self.task = None self.env = None return True def name(self) -> str: return self._task_name # ------------- private methods ------------- # def _update_info_dict(self): # update info dict self._info["action mode"] = self._action_mode self._info["observation mode"] = self._observation_mode # TODO: action dim should related to robot, not action mode, here we fixed it temporally self._info["action dim"] = (self._action_config.action_size, ) self._info["action low"] = -np.ones(self._info["action dim"], dtype=np.float32) self._info["action high"] = np.ones(self._info["action dim"], dtype=np.float32) if self._observation_mode == "state" or self._observation_mode == "all": # TODO: observation should be determined without init the entire environment with suppress_stdout(): env = RLEnvironment(action_mode=self._action_config, obs_config=self._observation_config, headless=True, robot_configuration=self._robot_name) env.launch() task = env.get_task(get_named_class(self._task_name, tasks)) _, obs = task.reset() env.shutdown() del task del env self._info["time step"] = _DT self._info["state dim"] = tuple(obs.get_low_dim_data().shape) self._info["state low"] = np.ones(self._info["state dim"], dtype=np.float32) * -np.inf self._info["state high"] = np.ones(self._info["state dim"], dtype=np.float32) * np.inf if self._observation_mode == "vision" or self._observation_mode == "all": self._info["left shoulder rgb dim"] = tuple( self._observation_config.left_shoulder_camera.image_size) + ( 3, ) self._info["left shoulder rgb low"] = np.zeros( self._info["left shoulder rgb dim"], dtype=np.float32) self._info["left shoulder rgb high"] = np.ones( self._info["left shoulder rgb dim"], dtype=np.float32) self._info["right shoulder rgb dim"] = tuple( self._observation_config.right_shoulder_camera.image_size) + ( 3, ) self._info["right shoulder rgb low"] = np.zeros( self._info["right shoulder rgb dim"], dtype=np.float32) self._info["right shoulder rgb high"] = np.ones( self._info["right shoulder rgb dim"], dtype=np.float32) self._info["wrist rgb dim"] = tuple( self._observation_config.wrist_camera.image_size) + (3, ) self._info["wrist rgb low"] = np.zeros(self._info["wrist rgb dim"], dtype=np.float32) self._info["wrist rgb high"] = np.ones(self._info["wrist rgb dim"], dtype=np.float32) self._info["reward low"] = -np.inf self._info["reward high"] = np.inf def live_demo(self, amount: int, random: bool = True) -> SampleBatch: """ :param amount: number of demonstration trajectories to be generated :param random: if the starting position is random :return: observation list : [amount x [(steps-1) x [s, a] + [s_term, None]]], WARNING: that the action here is calculated from observation, when executing, they may cause some inaccuracy """ seeds = [rnd.randint(0, 4096) if random else 0 for _ in range(amount)] self.task._static_positions = not random demo_pack = [] for seed in seeds: np.random.seed(seed) pack = self.task.get_demos(1, True)[0] demo_traj = [] np.random.seed(seed) desc, obs = self.task.reset() v_tar = 0. for o_tar in pack[1:]: action = [] if self._action_config.arm == ArmActionMode.ABS_JOINT_VELOCITY: action.extend( (o_tar.joint_positions - obs.joint_positions) / _DT) elif self._action_config.arm == ArmActionMode.ABS_JOINT_POSITION: action.extend(o_tar.joint_positions) elif self._action_config.arm == ArmActionMode.ABS_JOINT_TORQUE: action.extend(o_tar.joint_forces) raise TypeError( "Warning, abs_joint_torque is not currently supported") elif self._action_config.arm == ArmActionMode.ABS_EE_POSE: action.extend(o_tar.gripper_pose) elif self._action_config.arm == ArmActionMode.ABS_EE_VELOCITY: # WARNING: This calculating method is not so accurate since rotation cannot be directed 'add' together # since the original RLBench decides to do so, we should follow it action.extend( (o_tar.gripper_pose - obs.gripper_pose) / _DT) elif self._action_config.arm == ArmActionMode.DELTA_JOINT_VELOCITY: v_tar = (o_tar.joint_positions - obs.joint_positions) / _DT action.extend(v_tar - obs.joint_velocities) raise TypeError( "Warning, delta_joint_velocity is not currently supported" ) elif self._action_config.arm == ArmActionMode.DELTA_JOINT_POSITION: action.extend(o_tar.joint_positions - obs.joint_positions) elif self._action_config.arm == ArmActionMode.DELTA_JOINT_TORQUE: action.extend(o_tar.joint_forces - obs.joint_forces) raise TypeError( "Warning, delta_joint_torque is not currently supported" ) elif self._action_config.arm == ArmActionMode.DELTA_EE_POSE: action.extend(o_tar.gripper_pose[:3] - obs.gripper_pose[:3]) q = Quaternion(o_tar.gripper_pose[3:7]) * Quaternion( obs.gripper_pose[3:7]).conjugate action.extend(list(q)) elif self._action_config.arm == ArmActionMode.DELTA_EE_VELOCITY: # WARNING: This calculating method is not so accurate since rotation cannot be directed 'add' together # since the original RLBench decides to do so, we should follow it v_tar_new = (o_tar.gripper_pose - obs.gripper_pose) / _DT action.extend(v_tar_new - v_tar) v_tar = v_tar_new raise TypeError( "Warning, delta_ee_velocity is not currently supported" ) action.append(1.0 if o_tar.gripper_open > 0.9 else 0.0) action = np.asarray(action, dtype=np.float32) demo_traj.append({ 'observation': obs, 'a': action, 's': obs.get_low_dim_data() }) obs, reward, done = self.task.step(action) demo_traj[-1]['r'] = reward demo_pack.append(demo_traj) return { "trajectory": demo_pack, "config": "default", "policy": "hand-coding", "env class": self.__class__.__name__, "env name": self._task_name, "env config": "default", "observation config": self._observation_mode, "robot config": self._robot_name, "action config": self._action_mode }
class RLBench(BaseSimulator): def __init__(self, h): #64x64 camera outputs cam = CameraConfig(image_size=(64, 64)) obs_config = ObservationConfig(left_shoulder_camera=cam, right_shoulder_camera=cam, wrist_camera=cam, front_camera=cam) obs_config.set_all(True) # delta EE control with motion planning action_mode = ActionMode(ArmActionMode.DELTA_EE_POSE_PLAN_WORLD_FRAME) #Inits self.env = Environment(action_mode, obs_config=obs_config, headless=h) self.env.launch() self.task = self.env.get_task(ReachTarget) def reset(self): d, o = self.task.reset() return o def step(self, a, prev_state): # delta orientation d_quat = np.array([0, 0, 0, 1]) # gripper state gripper_open = 1.0 # delta position d_pos = np.zeros(3) # For positive magnitude if (a % 2 == 0): a = int(a / 2) d_pos[a] = 0.03 # For negative magnitude else: a = int((a - 1) / 2) d_pos[a] = -0.03 # Forming action as expected by the environment action = np.concatenate([d_pos, d_quat, [gripper_open]]) try: s, r, t = self.task.step(action) r *= 1000 # Handling failure in planning except ConfigurationPathError: s = prev_state r = -0.1 t = False # Handling wrong action for inverse Jacobian except InvalidActionError: s = prev_state r = -0.01 t = False # Get bounding box centroids x, y, z = self.task._scene._workspace.get_position() # Set bounding box limits minx = x - 0.25 maxx = x + 0.25 miny = y - 0.35 maxy = y + 0.35 minz = z maxz = z + 0.5 bounding_box = [minx, maxx, miny, maxy, minz, maxz] # Get gripper position gripper_pose = s.gripper_pose # Reward for being in the bounding box if (self.bb_check(bounding_box, gripper_pose)): r += 0.1 return s, r, t # Check if gripper in the bounding box def bb_check(self, bounding_box, gripper_pose): out = True if (gripper_pose[0] < bounding_box[0] or gripper_pose[0] > bounding_box[1]): out = False if (gripper_pose[1] < bounding_box[2] or gripper_pose[1] > bounding_box[3]): out = False if (gripper_pose[2] < bounding_box[4] or gripper_pose[2] > bounding_box[5]): out = False return out @staticmethod def n_actions(): return 6 def shutdown(self): print("Shutdown") self.env.shutdown() def __del__(self): print("Shutdown") self.env.shutdown()
class RLBenchEnv(): """ make RLBench env to have same interfaces as openai.gym """ def __init__( self, task_name: str, state_type: list = 'state', ): # render_mode=None): """ create RL Bench environment :param task_name: task names can be found in rlbench.tasks :param state_type: state or vision or a sub list of state_types list like ['left_shoulder_rgb'] """ if state_type == 'state' or state_type == 'vision' or isinstance( state_type, list): self._state_type = state_type else: raise ValueError( 'State type value error, your value is {}'.format(state_type)) # self._render_mode = render_mode self._render_mode = None obs_config = ObservationConfig() obs_config.set_all(True) action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY) self.env = Environment(action_mode, obs_config=obs_config, headless=True) self.env.launch() try: self.task = self.env.get_task( getattr(sys.modules[__name__], task_name)) except: raise NotImplementedError _, obs = self.task.reset() self.spec = Spec(task_name) if self._state_type == 'state': self.observation_space = spaces.Box( low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape) elif self._state_type == 'vision': space_dict = OrderedDict() space_dict["state"] = spaces.Box( low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape) for i in [ "left_shoulder_rgb", "right_shoulder_rgb", "wrist_rgb", "front_rgb" ]: space_dict[i] = spaces.Box(low=0, high=1, shape=getattr(obs, i).shape) self.observation_space = spaces.Dict(space_dict) else: space_dict = OrderedDict() for name in self._state_type: if name.split('_')[-1] in ('rgb'): space_dict[name] = spaces.Box(low=0, high=1, shape=getattr(obs, name).shape) elif name.split('_')[-1] in ('depth', 'mask'): space_dict[name] = spaces.Box(low=0, high=1, shape=(128, 128, 3)) print(space_dict[name].shape) else: space_dict[name] = spaces.Box(low=-np.inf, high=np.inf, shape=getattr(obs, name).shape) self.observation_space = spaces.Dict(space_dict) self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(self.env.action_size, ), dtype=np.float32) # if render_mode is not None: # # Add the camera to the scene # cam_placeholder = Dummy('cam_cinematic_placeholder') # self._gym_cam = VisionSensor.create([640, 360]) # self._gym_cam.set_pose(cam_placeholder.get_pose()) # if render_mode == 'human': # self._gym_cam.set_render_mode(RenderMode.OPENGL3_WINDOWED) # else: # self._gym_cam.set_render_mode(RenderMode.OPENGL3) def _extract_obs(self, obs): if self._state_type == 'state': return np.array(obs.get_low_dim_data(), np.float32) elif self._state_type == 'vision': return np.array([ np.array(obs.get_low_dim_data(), np.float32), np.array(obs.left_shoulder_rgb, np.float32), np.array(obs.right_shoulder_rgb, np.float32), np.array(obs.wrist_rgb, np.float32), np.array(obs.front_rgb, np.float32), ]) else: result = [] for name in self._state_type: if name.split('_')[-1] in ('mask', 'depth'): a = np.expand_dims(np.array(getattr(obs, name), np.float32), axis=-1) a = np.concatenate( (a, np.zeros((128, 128, 2), dtype=a.dtype)), axis=2) result.append(a) else: result.append(np.array(getattr(obs, name), np.float32)) return np.array(result) def seed(self, seed_value): # set seed as in openai.gym env pass def render(self, mode='human'): # todo render available at any time if self._render_mode is None: self._render_mode = mode # Add the camera to the scene cam_placeholder = Dummy('cam_cinematic_placeholder') self._gym_cam = VisionSensor.create([640, 360]) self._gym_cam.set_pose(cam_placeholder.get_pose()) if mode == 'human': self._gym_cam.set_render_mode(RenderMode.OPENGL3_WINDOWED) else: self._gym_cam.set_render_mode(RenderMode.OPENGL3) if mode != self._render_mode: raise ValueError( 'The render mode must match the render mode selected in the ' 'constructor. \nI.e. if you want "human" render mode, then ' 'create the env by calling: ' 'gym.make("reach_target-state-v0", render_mode="human").\n' 'You passed in mode %s, but expected %s.' % (mode, self._render_mode)) if mode == 'rgb_array': return self._gym_cam.capture_rgb() def reset(self): descriptions, obs = self.task.reset() return self._extract_obs(obs) def step(self, action): obs, reward, terminate = self.task.step(action) """ img = Image.fromarray(np.uint8(np.array(obs.left_shoulder_rgb, np.float32)*255), 'RGB') img.show() a = np.array(obs.left_shoulder_mask, np.float32) img = Image.fromarray(np.uint8(a*255), 'L') img.show() a = np.array(obs.left_shoulder_depth, np.float32) img = Image.fromarray(np.uint8(a*255), 'L') img.show() input('> ') """ return self._extract_obs(obs), reward, terminate, None def close(self): self.env.shutdown()
class StateMachine(object): def __init__(self): self.env = None self.home = None self.task = None self.sensor = None self.objs_dict = None def initialize(self, headless=False): DATASET = '' obs_config = ObservationConfig() obs_config.set_all(True) obs_config.left_shoulder_camera.rgb = True obs_config.right_shoulder_camera.rgb = True action_mode = ActionMode(ArmActionMode.ABS_EE_POSE_PLAN) self.env = Environment(action_mode, DATASET, obs_config, headless=headless) self.sensor = NoisyObjectPoseSensor(self.env) self.env.launch() self.task = self.env.get_task(EmptyContainer) self.task.reset() self.home = self.get_objects(False)['large_container'].get_pose() self.home[2] += 0.3 # demos = task.get_demos(3, live_demos=live_demos) self.objs = self.get_objects() self.movable_objects = ['Shape', 'Shape1', 'Shape3'] self.target_bins = ['small_container0'] self.start_bins = ['large_container'] self.max_retry = 5 self.env._robot.gripper.set_joint_forces([50, 50]) def get_objects(self, graspable=False): if graspable: objs = self.task._task.get_graspable_objects() objs_dict = {} for item in objs: name = item.get_name() objs_dict[name] = item return objs_dict else: objs = self.env._scene._active_task.get_base().\ get_objects_in_tree(exclude_base=True, first_generation_only=False) objs_dict = {} for obj in objs: name = obj.get_name() objs_dict[name] = obj return objs_dict # Move above object def move_to(self, pose, pad=0.05, ignore_collisions=True, quat=np.array([0, 1, 0, 0])): target_pose = np.copy(pose) obs = self.env._scene.get_observation() init_pose = obs.gripper_pose obs = self.env._scene.get_observation() init_pose = obs.gripper_pose target_pose[2] += pad path = self.env._robot.arm.get_path(np.array(target_pose[0:3]), quaternion=quat, trials=1000, ignore_collisions=True, algorithm=Algos.RRTConnect) # TODO catch errors and deal with situations when path not found return path def grasp(self, obj): # open the grippers cond = False while not cond: cond = self.env._robot.gripper.actuate(1, 0.1) self.env._scene.step() cond = False # now close while not cond: cond = self.env._robot.gripper.actuate(0, 0.1) self.env._scene.step() cond = self.env._robot.gripper.grasp(obj) return cond def release(self, obj): cond = False while not cond: cond = self.env._robot.gripper.actuate(1, 0.1) self.env._scene.step() self.env._robot.gripper.release() def execute(self, path): done = False path.set_to_start() while not done: done = path.step() # a = path.visualize() self.env._scene.step() return done def go_to(self, pose, pad=0.05, quat=np.array([0, 1, 0, 0]), gripper_close=False): pose_cp = copy.copy(pose) pose_cp[2] += pad pose_cp[3:] = quat wp = pose_cp.tolist() + [1] if gripper_close: wp = pose_cp.tolist() + [0] try: self.task.step(wp) except: print("Retrying with normal path planner") path = self.move_to(pose, pad, True, quat) self.execute(path) return def reset(self): self.task.reset() def is_within(self, obj1, obj2): #whether obj2 is within obj1 obj1_bbox = obj1.get_bounding_box() obj1_mat = np.array(obj1.get_matrix()).reshape(3, 4) obj2_bbox = obj2.get_bounding_box() obj2_mat = np.array(obj2.get_matrix()).reshape(3, 4) obj1_rect = get_edge_points(obj1_bbox, obj1_mat) obj2_rect = get_edge_points(obj2_bbox, obj2_mat) return contains(obj1_rect, obj2_rect) def picking_bin_empty(self): ''' Returns whether the picking bin is empty ''' self.objs_dict = machine.get_objects() bin_obj = self.objs_dict['large_container'] #?Which one for obj_name in self.objs_dict.keys(): if (not ('container' in obj_name)): if self.is_within(bin_obj, self.objs_dict[obj_name]): return False return True def placing_bin_full(self): ''' Returns whether the placing bin is full ''' self.objs_dict = machine.get_objects() bin_obj = self.objs_dict['small_container1'] #?Which one for obj_name in self.objs_dict.keys(): if (not ('container' in obj_name)): if not (self.is_within(bin_obj, self.objs_dict[obj_name])): return False return True def get_all_shapes(self): objects = [] for name in self.objs: if "Shape" in name: objects.append(name) print(objects) return objects def get_grasp_pose(self, theta): q = quaternion(0, 1, 0, 0) cos = np.cos(theta / 2) sin = np.sin(theta / 2) p = quaternion(cos, 0, 0, sin) rot_qt = p * q print("Theta", rot_qt, cos, sin, theta / 2) rot_arr = qn.as_float_array(rot_qt) rot_qt = [rot_arr[1], rot_arr[2], rot_arr[3], rot_arr[0]] return rot_qt def get_full_grasp_pose(self, thetas): thetax = thetas[0] cos = np.cos(thetax / 2) sin = np.sin(thetax / 2) qx = quaternion(cos, sin, 0, 0) thetay = thetas[1] cos = np.cos(thetay / 2) sin = np.sin(thetay / 2) qy = quaternion(cos, 0, sin, 0) thetaz = thetas[2] cos = np.cos(thetaz / 2) sin = np.sin(thetaz / 2) qz = quaternion(cos, 0, 0, sin) q_init = quaternion(0, 1, 0, 0) r = q_init * qz * qx * qy r_arr = qn.as_float_array(r) quat_vec = np.array([r_arr[1], r_arr[2], r_arr[3], r_arr[0]]) return quat_vec def move_objects_to_target(self, target_bins, start_bins): target_bin = target_bins[0] start_bin = start_bins[0] start_bin_pose = self.objs[start_bin].get_pose() start_bin_pose[2] += 0.3 target_bin_pose = self.objs[target_bin].get_pose() target_bin_pose[2] += 0.3 ''' move_objs = [] for obj in machine.movable_objects: if self.is_within(target_bin, obj): move_objs.append(obj) print(move_objs) ''' move_objs = machine.movable_objects for i, shape in enumerate(move_objs): cond = False retry_count = 0 while not cond and retry_count < self.max_retry: theta = 2 * np.pi * retry_count / self.max_retry quat = self.get_grasp_pose(theta) # go back to home position machine.go_to(start_bin_pose, 0, gripper_close=False) # move above object objs_poses = machine.sensor.get_poses() # pose=objs[shape].get_pose() pose = objs_poses[shape] machine.go_to(pose, 0, quat=quat, gripper_close=False) # grasp the object cond = machine.grasp(self.objs[shape]) if not cond: retry_count += 1 print("retry count: ", retry_count) # move to home position machine.go_to(start_bin_pose, 0, gripper_close=True) print("Gripper joint forces", self.env._robot.gripper.get_joint_forces()) # move above small container objs_poses = machine.sensor.get_poses() pose = objs_poses[target_bin] pose[0] += (i * 0.04 - 0.04) machine.go_to(pose, 0.05, gripper_close=True) # release the object machine.release(self.objs[shape]) machine.go_to(machine.home, 0, gripper_close=False)
obs_config = ObservationConfig() obs_config.set_all(False) obs_config.left_shoulder_camera.rgb = True obs_config.right_shoulder_camera.rgb = True obs_config.wrist_camera.rgb = True action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY) env = Environment(action_mode, obs_config=obs_config, headless=False, robot_configuration='jaco') env.launch() task = env.get_task(ReachTarget) agent = Agent(action_size=7) # Jaco is 6DoF + 1 for gripper action training_steps = 120 episode_length = 40 obs = None for i in range(training_steps): if i % episode_length == 0: print('Reset Episode') descriptions, obs = task.reset() print(descriptions) action = agent.act(obs) print(action) obs, reward, terminate = task.step(action)
class RLBenchDataEnvGroup(DataEnvGroup): ''' DataEnvGroup for RLBench environment. + The observation space can be modified through `global_config.env_args` + Observation space: - 'state': proprioceptive feature : [37] + task_specific > robot joint - velocities, positions, forces > gripper - pose, joint-position, touch_forces > task_low_dim_state - RGB-D/RGB image : (128 x 128 by default) > 'left_shoulder_rgb', 'right_shoulder_rgb' > 'front_rgb', 'wrist_rgb' - NOTE : Better to use only one of the RGB obvs rather than all, saves a lot of time while env creation. - Refer: https://github.com/stepjam/RLBench/blob/20988254b773aae433146fff3624d8bcb9ed7330/rlbench/observation_config.py + The action spaces by default are joint velocities [7] and gripper actuations [1]. - Dimension : [8] ''' def __init__(self, get_episode_type=None): super(RLBenchDataEnvGroup, self).__init__(get_episode_type) assert self.env_name == 'RLBENCH' self.env_obj = None self.use_gym = self.config.env_args['use_gym'] self.observation_mode = self.config.env_args['observation_mode'].lower( ) self.left_obv_key = 'left_shoulder_rgb' self.right_obv_key = 'right_shoulder_rgb' self.wrist_obv_key = 'wrist_rgb' self.front_obv_key = 'front_rgb' if self.observation_mode not in ['vision', 'state' ] and not self.use_gym: self.config.env_args['vis_obv_key'] = self.observation_mode self.vis_obv_key = self.config.env_args['vis_obv_key'] self.dof_obv_key = 'state' self.env_action_key = 'joint_velocities' self.env_gripper_key = 'gripper_open' self.obs_space = { self.dof_obv_key: (37), self.left_obv_key: (128, 128, 3), self.right_obv_key: (128, 128, 3), self.wrist_obv_key: (128, 128, 3), self.front_obv_key: (128, 128, 3) } self.action_space, self.gripper_space = (7), (1) if self.config.env_args['combine_action_space']: self.action_space += self.gripper_space def get_env(self, task=None): task = task if task else self.config.env_type if self.use_gym: assert type( task ) == str # NOTE : When using gym, the task has to be represented as a sting. assert self.observation_mode in ['vision', 'state'] env = gym.make( task, observation_mode=self.config.env_args['observation_mode'], render_mode=self.config.env_args['render_mode']) self.env_obj = env else: obs_config = ObservationConfig() if self.observation_mode == 'vision': obs_config.set_all(True) elif self.observation_mode == 'state': obs_config.set_all_high_dim(False) obs_config.set_all_low_dim(True) else: obs_config_dict = { self.left_obv_key: obs_config.left_shoulder_camera, self.right_obv_key: obs_config.right_shoulder_camera, self.wrist_obv_key: obs_config.wrist_camera, self.front_obv_key: obs_config.front_camera } assert self.observation_mode in obs_config_dict.keys() obs_config.set_all_high_dim(False) obs_config_dict[self.observation_mode].set_all(True) obs_config.set_all_low_dim(True) # TODO : Write code to change it from env_args action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY) self.env_obj = Environment(action_mode, obs_config=obs_config, headless=True) task = task if task else ReachTarget if type(task) == str: task = task.split('-')[0] task = self.env_obj._string_to_task(task) self.env_obj.launch() env = self.env_obj.get_task( task) # NOTE : `env` refered as `task` in RLBench docs. return env def _get_obs(self, obs, key): assert obs is not None and key is not None if type(obs) == tuple: obs = obs[1] if type(obs) == dict: return obs[key] elif type(obs) == Observation: if key == 'state': return obs.get_low_dim_data() return getattr(obs, key) def shutdown_env(self): if self.env_obj is None: print("Environment not created, call `.get_env()`") elif self.use_gym: self.env_obj.close() else: self.env_obj.shutdown() self.env_obj = None def teleoperate(self, demons_config, task=None): if self.config.env_args['keyboard_teleop']: raise NotImplementedError else: if self.env_obj is None or task is None: env = self.get_env(task) else: if type(task) == str: task = self.env_obj._string_to_task(task.split('-')[0]) env = self.env_obj.get_task(task) if self.use_gym: demos = env.task.get_demos(demons_config.n_runs, live_demos=True) else: demos = env.get_demos(demons_config.n_runs, live_demos=True) demos = np.array(demos).flatten() for i in range(demons_config.n_runs): sample = demos[i] if self.observation_mode != 'state': tr_vobvs = np.array([ self._get_obs(obs, self.vis_obv_key) for obs in sample ]) tr_dof = np.array([ self._get_obs(obs, self.dof_obv_key).flatten() for obs in sample ]) tr_actions = np.array([ self._get_obs(obs, self.env_action_key).flatten() for obs in sample ]) tr_gripper = np.array( [[self._get_obs(obs, self.env_gripper_key)] for obs in sample]) if self.config.env_args['combine_action_space']: tr_actions = np.concatenate((tr_actions, tr_gripper), axis=-1) print("Storing Trajectory") trajectory = {self.dof_obv_key: tr_dof, 'action': tr_actions} if self.observation_mode != 'state': trajectory.update({self.vis_obv_key: tr_vobvs}) store_trajectoy(trajectory, episode_type='teleop', task=task) self.shutdown_env() def random_trajectory(self, demons_config): env = self.get_env() obs = env.reset() tr_vobvs, tr_dof, tr_actions = [], [], [] for step in range(demons_config.flush_freq): if self.observation_mode != 'state': tr_vobvs.append(np.array(self._get_obs(obs, self.vis_obv_key))) tr_dof.append( np.array(self._get_obs(obs, self.dof_obv_key).flatten())) action = np.random.normal(size=self.action_space[0]) obs, reward, done, info = env.step(action) tr_actions.append(action) print("Storing Trajectory") trajectory = { self.dof_obv_key: np.array(tr_dof), 'action': np.array(tr_actions) } if self.observation_mode != 'state': trajectory.update({self.vis_obv_key: np.array(tr_vobvs)}) store_trajectoy(trajectory, episode_type='random') self.shutdown_env()
class RLBenchEnv(gym.Env): """An gym wrapper for RLBench.""" metadata = {'render.modes': ['human']} def __init__(self, task_class, observation_mode='state', action_mode='joint', multi_action_space=False, discrete_action_space=False): self.num_skip_control = 20 self.epi_obs = [] self.obs_record = True self.obs_record_id = None self._observation_mode = observation_mode self.obs_config = ObservationConfig() if observation_mode == 'state': self.obs_config.set_all_high_dim(False) self.obs_config.set_all_low_dim(True) elif observation_mode == 'state-goal': self.obs_config.set_all_high_dim(False) self.obs_config.set_all_low_dim(True) self.obs_config.set_goal_info(True) elif observation_mode == 'vision': self.obs_config.set_all(False) self.obs_config.set_camera_rgb(True) elif observation_mode == 'both': self.obs_config.set_all(True) else: raise ValueError('Unrecognised observation_mode: %s.' % observation_mode) self._action_mode = action_mode self.ac_config = None if action_mode == 'joint': self.ac_config = ActionConfig( SnakeRobotActionConfig.ABS_JOINT_POSITION) elif action_mode == 'trigon': self.ac_config = ActionConfig( SnakeRobotActionConfig.TRIGON_MODEL_PARAM, is_discrete=discrete_action_space) else: raise ValueError('Unrecognised action_mode: %s.' % action_mode) self.env = Environment(action_config=self.ac_config, obs_config=self.obs_config, headless=True) self.env.launch() self.task = self.env.get_task(task_class) self.max_episode_steps = self.task.episode_len _, obs = self.task.reset() if action_mode == 'joint': self.action_space = spaces.Box( low=-1.7, high=1.7, shape=(self.ac_config.action_size, ), dtype=np.float32) elif action_mode == 'trigon': if multi_action_space: # action_space1 = spaces.MultiBinary(n=1) low1 = np.array([-0.8, -0.8]) high1 = np.array([0.8, 0.8]) action_space1 = spaces.Box(low=low1, high=high1, dtype=np.float32) # low = np.array([-0.8, -0.8, 1.0, 3.0, -50, -10, -0.1, -0.1]) # high = np.array([0.8, 0.8, 3.0, 5.0, 50, 10, 0.1, 0.1]) low2 = np.array([1.0]) high2 = np.array([3.0]) action_space2 = spaces.Box(low=low2, high=high2, dtype=np.float32) self.action_space = spaces.Tuple( (action_space1, action_space2)) elif discrete_action_space: self.action_space = spaces.MultiDiscrete([4, 4, 4]) else: # low = np.array([0.0, -0.8, -0.8, 1.0, 3.0, -50, -10, -0.1, -0.1]) # high = np.array([1.0, 0.8, 0.8, 3.0, 5.0, 50, 10, 0.1, 0.1]) low = np.array([-1, -1, -1]) high = np.array([1, 1, 1]) self.action_space = spaces.Box(low=low, high=high, dtype=np.float32) if observation_mode == 'state' or observation_mode == 'state-goal': self.observation_space = spaces.Box( low=-np.inf, high=np.inf, shape=(obs.get_low_dim_data().shape[0] * self.num_skip_control, )) if observation_mode == 'state-goal': self.goal_dim = obs.get_goal_dim() elif observation_mode == 'vision': self.observation_space = spaces.Box( low=0, high=1, shape=obs.head_camera_rgb.shape) elif observation_mode == 'both': self.observation_space = spaces.Dict({ "state": spaces.Box(low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape), "rattler_eye_rgb": spaces.Box(low=0, high=1, shape=obs.head_camera_rgb.shape), "rattler_eye_depth": spaces.Box(low=0, high=1, shape=obs.head_camera_depth.shape), }) self._gym_cam = None def _extract_obs(self, obs): if self._observation_mode == 'state': return obs.get_low_dim_data() elif self._observation_mode == 'state-goal': obs_goal = {'observation': obs.get_low_dim_data()} obs_goal.update(obs.get_goal_data()) return obs_goal elif self._observation_mode == 'vision': return obs.head_camera_rgb elif self._observation_mode == 'both': return { "state": obs.get_low_dim_data(), "rattler_eye_rgb": obs.head_camera_rgb, "rattle_eye_depth": obs.head_camera_depth, } def render(self, mode='human'): if self._gym_cam is None: # # Add the camera to the scene self._gym_cam = VisionSensor('monitor') self._gym_cam.set_resolution([640, 640]) self._gym_cam.set_render_mode(RenderMode.EXTERNAL_WINDOWED) def reset(self): obs_data_group = [] obs_data_dict = { 'observation': [], 'desired_goal': None, 'achieved_goal': None } descriptions, obs = self.task.reset() self.epi_obs.append(obs) obs_data = self._extract_obs(obs) for _ in range(self.num_skip_control): # obs_data_group.extend(obs_data) if isinstance(obs_data, list) or isinstance(obs_data, np.ndarray): obs_data_group.extend(obs_data) elif isinstance(obs_data, dict): obs_data_dict['observation'].extend(obs_data['observation']) obs_data_dict['desired_goal'] = obs_data['desired_goal'] obs_data_dict['achieved_goal'] = obs_data['achieved_goal'] ret_obs = obs_data_group if len(obs_data_group) else obs_data_dict del descriptions # Not used return ret_obs def step(self, action): obs_data_group = [] obs_data_dict = { 'observation': [], 'desired_goal': None, 'achieved_goal': None } reward_group = [] terminate = False for _ in range(self.num_skip_control): obs, reward, step_terminate, success = self.task.step(action) self.epi_obs.append(obs) obs_data = self._extract_obs(obs) if isinstance(obs_data, list) or isinstance(obs_data, np.ndarray): obs_data_group.extend(obs_data) elif isinstance( obs_data, dict): # used for hierarchical reinforcement algorithm obs_data_dict['observation'].extend(obs_data['observation']) obs_data_dict['desired_goal'] = obs_data['desired_goal'] obs_data_dict['achieved_goal'] = obs_data['achieved_goal'] reward_group.append(reward) terminate |= step_terminate if terminate: if self.obs_record and success: # record a successful experience self.record_obs("RobotPos") self.epi_obs = [] break ret_obs = obs_data_group if len(obs_data_group) else obs_data_dict return ret_obs, np.mean(reward_group), terminate, {} def close(self): self.env.shutdown() # def load_env_param(self): # self.env.load_env_param() def compute_reward(self, achieved_goal=None, desired_goal=None, info=None): assert achieved_goal is not None assert desired_goal is not None return self.task.compute_reward(achieved_goal, desired_goal) def record_obs(self, obs_part): if self.obs_record_id is None: record_filenames = glob.glob("./obs_record/obs_record_*.txt") record_filenames.sort(key=lambda filename: int( filename.split('_')[-1].split('.')[0])) if len(record_filenames) == 0: self.obs_record_id = 1 else: last_id = int( record_filenames[-1].split('_')[-1].split('.')[0]) self.obs_record_id = last_id + 1 else: self.obs_record_id += 1 filename = './obs_record/obs_record_' + str( self.obs_record_id) + '.txt' obs_record_file = open(filename, 'w') if obs_part == 'All': pass if obs_part == 'RobotPos': robot_pos_arr = [] for obs in self.epi_obs: robot_pos = obs.get_2d_robot_pos() robot_pos_arr.append(robot_pos) target_pos = self.task.get_goal() robot_pos_arr.append( target_pos) # The last line records the target position robot_pos_arr = np.array(robot_pos_arr) np.savetxt(obs_record_file, robot_pos_arr, fmt="%f") obs_record_file.close()
class GraspController: def __init__(self, action_mode, static_positions=True): # Initialize environment with Action mode and observations # Resize the write camera to fit the GQCNN wrist_camera = CameraConfig(image_size=(1032, 772)) self.env = Environment(action_mode, '', ObservationConfig(wrist_camera=wrist_camera), False, static_positions=static_positions) self.env.launch() # Load specified task into the environment self.task = self.env.get_task(EmptyContainer) def reset(self): descriptions, obs = self.task.reset() return descriptions, obs def get_objects(self, add_noise=False): objs = self.env._scene._active_task.get_base().get_objects_in_tree(exclude_base=True, first_generation_only=False) objs_dict = {} for obj in objs: name = obj.get_name() pose = obj.get_pose() if add_noise: pose = noisy_object(pose) objs_dict[name] = [obj, pose] return objs_dict def get_path(self, pose, set_orientation=False): # TODO deal with situations when path not found if set_orientation: path = self.env._robot.arm.get_path(pose[:3], quaternion=pose[3:], ignore_collisions=True, algorithm=Algos.RRTConnect, trials=1000) else: path = self.env._robot.arm.get_path(pose[:3], quaternion=np.array([0, 1, 0, 0]), ignore_collisions=True, algorithm=Algos.RRTConnect, trials=1000) return path def grasp(self): # TODO get feedback to check if grasp is successfull done_grab_action = False # Repeat unitil successfully grab the object while not done_grab_action: # gradually close the gripper done_grab_action = self.env._robot.gripper.actuate(0, velocity=0.2) # 0 is close self.env._pyrep.step() # self.task._task.step() # self.env._scene.step() grasped_objects = {} obj_list = ['Shape', 'Shape1', 'Shape3'] objs = self.env._scene._active_task.get_base().get_objects_in_tree(exclude_base=True, first_generation_only=False) for obj in objs: if obj.get_name() in obj_list: grasped_objects[obj.get_name()] = self.env._robot.gripper.grasp(obj) return grasped_objects # return self.env._robot.gripper.get_grasped_objects() def release(self): done = False while not done: done = self.env._robot.gripper.actuate(1, velocity=0.2) # 1 is release self.env._pyrep.step() # self.task._task.step() # self.env._scene.step() self.env._robot.gripper.release() def execute_path(self, path, open_gripper=True): path = path._path_points.reshape(-1, path._num_joints) for i in range(len(path)): action = list(path[i]) + [int(open_gripper)] obs, reward, terminate = self.task.step(action) return obs, reward, terminate
class RLBenchEnv(object): """ make RLBench env to have same interfaces as openai.gym """ def __init__(self, task_name, state_type_list=None, headless=False): if state_type_list is None: state_type_list = ['left_shoulder_rgb'] obs_config = ObservationConfig() obs_config.set_all(True) action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY) self.env = Environment(action_mode, obs_config=obs_config, headless=headless) self.env.launch() try: self.task = self.env.get_task(ReachTarget) except: raise NotImplementedError _, obs = self.task.reset() if len(state_type_list) > 0: self.observation_space = [] for state_type in state_type_list: state = getattr(obs, state_type) if isinstance(state, (int, float)): shape = (1, ) else: shape = state.shape self.observation_space.append( spaces.Box(low=-np.inf, high=np.inf, shape=shape)) else: raise ValueError('No State Type!') self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(action_mode.action_size, ), dtype=np.float32) self.state_type_list = state_type_list def seed(self, seed_value): # set seed as in openai.gym env pass def render(self): # render the scene pass def _get_state(self, obs): if len(self.state_type_list) > 0: state = [] for state_type in self.state_type_list: if state_type in image_types: image = getattr(obs, state_type) image = np.moveaxis( image, 2, 0) # change (H, W, C) to (C, H, W) for torch state.append(image) elif state_type in primitive_types: state.append(np.array([getattr(obs, state_type)])) else: state.append(getattr(obs, state_type)) else: raise ValueError('State Type Not Exists!') return state def reset(self): descriptions, obs = self.task.reset() return self._get_state(obs) def step(self, action): obs, reward, terminate = self.task.step( action) # reward in original rlbench is binary for success or not # distance between current pos (tip) and target pos # x, y, z, qx, qy, qz, qw = self.task._robot.arm.get_tip().get_pose() x, y, z, qx, qy, qz, qw = self.task._robot.gripper.get_pose() tar_x, tar_y, tar_z, _, _, _, _ = self.task._task.target.get_pose() distance = (np.abs(x - tar_x) + np.abs(y - tar_y) + np.abs(z - tar_z)) reward -= 1. * distance # speed regulation joint_velocities = self.task._robot.arm.get_joint_velocities() velocities = np.inner(joint_velocities, joint_velocities) reward -= 0.05 / len(joint_velocities) * velocities # orientation gripper_x, gripper_y, gripper_z, gripper_qx, gripper_qy, gripper_qz, gripper_qw = self.task._robot.gripper.get_pose( ) v1 = np.array( (tar_x - gripper_x, tar_y - gripper_y, tar_z - gripper_z)) temp = np.sin(np.arccos(gripper_qw)) v2 = np.array( (gripper_qx / temp, gripper_qy / temp, gripper_qz / temp)) orientation_angle = np.abs( np.arccos(v1.dot(v2) / np.sqrt(v1.dot(v1)) / np.sqrt(v2.dot(v2)))) # [0, pi] orientation_angle = orientation_angle if orientation_angle < np.pi / 2. else ( np.pi - orientation_angle) reward -= 0.5 / np.pi * orientation_angle # if orientation_angle > np.pi / 8. else 0. return self._get_state(obs), reward, terminate, None def close(self): self.env.shutdown()
from rlbench.action_modes.gripper_action_modes import Discrete from rlbench.environment import Environment from rlbench.observation_config import ObservationConfig from rlbench.tasks import CloseBox from rlbench.demo import Demo from mohou.types import RGBImage if __name__ == '__main__': obs_config = ObservationConfig() obs_config.set_all(True) env = Environment(action_mode=MoveArmThenGripper( arm_action_mode=JointPosition(), gripper_action_mode=Discrete()), obs_config=ObservationConfig(), headless=True) env.launch() task = env.get_task(CloseBox) task.reset() rgb_seq = [] for i in range(30): gripper = 0.0 action = np.ones(7) * 0.01 * i obs, _, _ = task.step(np.array(action.tolist() + [0])) rgb = RGBImage(obs.overhead_rgb) rgb_seq.append(rgb) clip = ImageSequenceClip([img.numpy() for img in rgb_seq], fps=50) clip.write_gif("tmp.gif", fps=50, loop=1)