def initialize(self, headless=False): DATASET = '' obs_config = ObservationConfig() obs_config.set_all(True) obs_config.left_shoulder_camera.rgb = True obs_config.right_shoulder_camera.rgb = True action_mode = ActionMode(ArmActionMode.ABS_EE_POSE_PLAN) self.env = Environment(action_mode, DATASET, obs_config, headless=headless) self.sensor = NoisyObjectPoseSensor(self.env) self.env.launch() self.task = self.env.get_task(EmptyContainer) self.task.reset() self.home = self.get_objects(False)['large_container'].get_pose() self.home[2] += 0.3 # demos = task.get_demos(3, live_demos=live_demos) self.objs = self.get_objects() self.movable_objects = ['Shape', 'Shape1', 'Shape3'] self.target_bins = ['small_container0'] self.start_bins = ['large_container'] self.max_retry = 5 self.env._robot.gripper.set_joint_forces([50, 50])
def __init__(self, task_name, state_type='left_shoulder_rgb'): obs_config = ObservationConfig() obs_config.set_all(True) action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY) self.env = Environment(action_mode, obs_config=obs_config, headless=False) self.env.launch() try: self.task = self.env.get_task( getattr(sys.modules[__name__], task_name)) except: raise NotImplementedError _, obs = self.task.reset() state = getattr(obs, state_type) self.state_type = state_type self.spec = Spec(task_name) self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(action_mode.action_size, ), dtype=np.float32) self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=state.shape)
def test_run_task_validator(self): for task_file in TASKS: test_name = task_file.split('.py')[0] with self.subTest(task=test_name): if test_name in FLAKY_TASKS: self.skipTest('Flaky task.') sim = PyRep() ttt_file = os.path.join( DIR_PATH, '..', '..', 'rlbench', TTT_FILE) sim.launch(ttt_file, headless=True) sim.step_ui() sim.set_simulation_timestep(50.0) sim.step_ui() sim.start() robot = Robot(Panda(), PandaGripper()) obs = ObservationConfig() obs.set_all(False) scene = Scene(sim, robot, obs) sim.start() task_class = task_file_to_task_class(task_file) active_task = task_class(sim, robot) try: task_smoke(active_task, scene, variation=-1, max_variations=2, success=0.25) except Exception as e: sim.stop() sim.shutdown() raise e sim.stop() sim.shutdown()
def __init__(self, task_name, state_type_list=['left_shoulder_rgb'], dataset_root='', observation_mode='state', headless=True): obs_config = ObservationConfig() obs_config.set_all(True) action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY) self._observation_mode = observation_mode self.env = Environment( action_mode, dataset_root, obs_config=obs_config, headless=headless) # Dont need to call launch as task.get_task can launch env. self.env.launch() self.task = self.env.get_task(task_name) _, obs = self.task.reset() self.action_space = spaces.Box( low=-1.0, high=1.0, shape=(self.env.action_size,), dtype=np.float32) # self.logger = logger.create_logger(__class__.__name__) # self.logger.propagate = 0 if len(state_type_list) > 0: self.observation_space = [] # for state_type in state_type_list: # state = getattr(obs, state_type) self.observation_space = spaces.Box( low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape) else: raise ValueError('No State Type!') self.state_type_list = state_type_list
def __init__(self, task_name, state_type_list=None, headless=False): if state_type_list is None: state_type_list = ['left_shoulder_rgb'] obs_config = ObservationConfig() obs_config.set_all(True) action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY) self.env = Environment(action_mode, obs_config=obs_config, headless=headless) self.env.launch() try: self.task = self.env.get_task(ReachTarget) except: raise NotImplementedError _, obs = self.task.reset() if len(state_type_list) > 0: self.observation_space = [] for state_type in state_type_list: state = getattr(obs, state_type) if isinstance(state, (int, float)): shape = (1, ) else: shape = state.shape self.observation_space.append( spaces.Box(low=-np.inf, high=np.inf, shape=shape)) else: raise ValueError('No State Type!') self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(action_mode.action_size, ), dtype=np.float32) self.state_type_list = state_type_list
def __init__(self, task_class, observation_mode='state', render_mode: Union[None, str] = None): self._observation_mode = observation_mode self._render_mode = render_mode obs_config = ObservationConfig() if observation_mode == 'state': obs_config.set_all_high_dim(False) obs_config.set_all_low_dim(True) elif observation_mode == 'vision': obs_config.set_all(True) else: raise ValueError('Unrecognised observation_mode: %s.' % observation_mode) action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY) self.env = Environment(action_mode, obs_config=obs_config, headless=True) self.env.launch() self.task = self.env.get_task(task_class) _, obs = self.task.reset() self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(self.env.action_size, )) if observation_mode == 'state': self.observation_space = spaces.Box( low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape) elif observation_mode == 'vision': self.observation_space = spaces.Dict({ "state": spaces.Box(low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape), "left_shoulder_rgb": spaces.Box(low=0, high=1, shape=obs.left_shoulder_rgb.shape), "right_shoulder_rgb": spaces.Box(low=0, high=1, shape=obs.right_shoulder_rgb.shape), "wrist_rgb": spaces.Box(low=0, high=1, shape=obs.wrist_rgb.shape), "front_rgb": spaces.Box(low=0, high=1, shape=obs.front_rgb.shape), }) if render_mode is not None: # Add the camera to the scene cam_placeholder = Dummy('cam_cinematic_placeholder') self._gym_cam = VisionSensor.create([640, 360]) self._gym_cam.set_pose(cam_placeholder.get_pose()) if render_mode == 'human': self._gym_cam.set_render_mode(RenderMode.OPENGL3_WINDOWED) else: self._gym_cam.set_render_mode(RenderMode.OPENGL3)
def get_task(self, task_class, arm_action_mode): obs_config = ObservationConfig() obs_config.set_all(False) obs_config.set_all_low_dim(True) obs_config.right_shoulder_camera.rgb = True action_mode = ActionMode(arm_action_mode) self.env = Environment( action_mode, ASSET_DIR, obs_config, headless=True) self.env.launch() return self.env.get_task(task_class)
def get_task(self, randomize_every, frequency, visual_config=None, dynamics_config=None): cam_config = CameraConfig(render_mode=RenderMode.OPENGL) obs_config = ObservationConfig(right_shoulder_camera=cam_config) obs_config.set_all(False) obs_config.set_all_low_dim(True) obs_config.right_shoulder_camera.rgb = True mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY) self.env = DomainRandomizationEnvironment( mode, ASSET_DIR, obs_config, True, True, randomize_every, frequency, visual_config, dynamics_config) self.env.launch() return self.env.get_task(OpenMicrowave)
def test_get_stored_demos_images_without_init_sim(self): obs_config = ObservationConfig() obs_config.set_all(False) obs_config.set_all_low_dim(True) obs_config.right_shoulder_camera.rgb = True action_mode = ActionMode() self.env = environment.Environment( action_mode, ASSET_DIR, obs_config, headless=True) demos = self.env.get_demos('reach_target', 2) self.assertEqual(len(demos), 2) self.assertGreater(len(demos[0]), 0) self.assertIsInstance(demos[0][0].right_shoulder_rgb, np.ndarray) self.assertIsNone(demos[0][0].left_shoulder_rgb)
def __init__(self, task_name: str, state_type: list = 'state', ): # render_mode=None): """ create RL Bench environment :param task_name: task names can be found in rlbench.tasks :param state_type: state or vision or a sub list of state_types list like ['left_shoulder_rgb'] """ if state_type == 'state' or state_type == 'vision' or isinstance(state_type, list): self._state_type = state_type else: raise ValueError('State type value error, your value is {}'.format(state_type)) # self._render_mode = render_mode self._render_mode = None obs_config = ObservationConfig() obs_config.set_all(True) action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY) self.env = Environment( action_mode, obs_config=obs_config, headless=True) self.env.launch() try: self.task = self.env.get_task(getattr(sys.modules[__name__], task_name)) except: raise NotImplementedError _, obs = self.task.reset() self.spec = Spec(task_name) if self._state_type == 'state': self.observation_space = spaces.Box( low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape) elif self._state_type == 'vision': space_dict = OrderedDict() space_dict["state"] = spaces.Box( low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape) for i in ["left_shoulder_rgb", "right_shoulder_rgb", "wrist_rgb", "front_rgb"]: space_dict[i] = spaces.Box( low=0, high=1, shape=getattr(obs, i).shape) self.observation_space = spaces.Dict(space_dict) else: space_dict = OrderedDict() for name in self._state_type: if name.split('_')[-1] in ('rgb', 'depth', 'mask'): space_dict[name] = spaces.Box( low=0, high=1, shape=getattr(obs, name).shape) else: space_dict[name] = spaces.Box( low=-np.inf, high=np.inf, shape=getattr(obs, name).shape) self.observation_space = spaces.Dict(space_dict) self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(self.env.action_size,), dtype=np.float32)
def get_env(self, task=None): task = task if task else self.config.env_type if self.use_gym: assert type( task ) == str # NOTE : When using gym, the task has to be represented as a sting. assert self.observation_mode in ['vision', 'state'] env = gym.make( task, observation_mode=self.config.env_args['observation_mode'], render_mode=self.config.env_args['render_mode']) self.env_obj = env else: obs_config = ObservationConfig() if self.observation_mode == 'vision': obs_config.set_all(True) elif self.observation_mode == 'state': obs_config.set_all_high_dim(False) obs_config.set_all_low_dim(True) else: obs_config_dict = { self.left_obv_key: obs_config.left_shoulder_camera, self.right_obv_key: obs_config.right_shoulder_camera, self.wrist_obv_key: obs_config.wrist_camera, self.front_obv_key: obs_config.front_camera } assert self.observation_mode in obs_config_dict.keys() obs_config.set_all_high_dim(False) obs_config_dict[self.observation_mode].set_all(True) obs_config.set_all_low_dim(True) # TODO : Write code to change it from env_args action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY) self.env_obj = Environment(action_mode, obs_config=obs_config, headless=True) task = task if task else ReachTarget if type(task) == str: task = task.split('-')[0] task = self.env_obj._string_to_task(task) self.env_obj.launch() env = self.env_obj.get_task( task) # NOTE : `env` refered as `task` in RLBench docs. return env
def __init__(self,\ action_mode=DEFAULT_ACTION_MODE,\ task=DEFAULT_TASK,\ dataset_root='',\ headless=True): obs_config = ObservationConfig() obs_config.set_all(True) action_mode = action_mode self.env = Environment( action_mode,dataset_root,obs_config=obs_config, headless=headless) # Dont need to call launch as task.get_task can launch env. self.task = self.env.get_task(task) _, obs = self.task.reset() self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(self.env.action_size,), dtype=np.float32) self.logger = logger.create_logger(__class__.__name__) self.logger.propagate = 0
def __init__(self, h): #64x64 camera outputs cam = CameraConfig(image_size=(64, 64)) obs_config = ObservationConfig(left_shoulder_camera=cam, right_shoulder_camera=cam, wrist_camera=cam, front_camera=cam) obs_config.set_all(True) # delta EE control with motion planning action_mode = ActionMode(ArmActionMode.DELTA_EE_POSE_PLAN_WORLD_FRAME) #Inits self.env = Environment(action_mode, obs_config=obs_config, headless=h) self.env.launch() self.task = self.env.get_task(ReachTarget)
def __init__(self, task_class, observation_mode='state'): self._observation_mode = observation_mode obs_config = ObservationConfig() if observation_mode == 'state': obs_config.set_all_high_dim(False) obs_config.set_all_low_dim(True) elif observation_mode == 'vision': obs_config.set_all(True) else: raise ValueError('Unrecognised observation_mode: %s.' % observation_mode) action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY) self.env = Environment(action_mode, obs_config=obs_config, headless=True) self.env.launch() self.task = self.env.get_task(task_class) _, obs = self.task.reset() self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(action_mode.action_size, )) if observation_mode == 'state': self.observation_space = spaces.Box( low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape) elif observation_mode == 'vision': self.observation_space = spaces.Dict({ "state": spaces.Box(low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape), "left_shoulder_rgb": spaces.Box(low=0, high=1, shape=obs.left_shoulder_rgb.shape), "right_shoulder_rgb": spaces.Box(low=0, high=1, shape=obs.right_shoulder_rgb.shape), "wrist_rgb": spaces.Box(low=0, high=1, shape=obs.wrist_rgb.shape), }) self._gym_cam = None
def main(argv): obs_config = ObservationConfig(record_gripper_closing=True) obs_config.set_all(False) vrc = rand_every = None frequency = 0 if FLAGS.domain_randomization: vrc = VisualRandomizationConfig(FLAGS.textures_path) rand_every = RandomizeEvery.TRANSITION frequency = 10 env = Environment(ActionMode(), obs_config=obs_config, randomize_every=rand_every, frequency=frequency, visual_randomization_config=vrc, headless=FLAGS.headless) env.launch() # Add the camera to the scene cam_placeholder = Dummy('cam_cinematic_placeholder') cam = VisionSensor.create(FLAGS.camera_resolution) cam.set_pose(cam_placeholder.get_pose()) cam.set_parent(cam_placeholder) cam_motion = CircleCameraMotion(cam, Dummy('cam_cinematic_base'), 0.005) tr = TaskRecorder(env, cam_motion, fps=30) if len(FLAGS.tasks) > 0: task_names = FLAGS.tasks else: task_names = [t.split('.py')[0] for t in os.listdir(TASKS_PATH) if t != '__init__.py' and t.endswith('.py')] task_classes = [task_file_to_task_class( task_file) for task_file in task_names] for i, (name, cls) in enumerate(zip(task_names, task_classes)): good = tr.record_task(cls) if FLAGS.individual and good: tr.save(os.path.join(FLAGS.save_dir, '%s.avi' % name)) if not FLAGS.individual: tr.save(os.path.join(FLAGS.save_dir, 'recorded_tasks.avi')) env.shutdown()
args = parser.parse_args() python_file = os.path.join(TASKS_PATH, args.task) if not os.path.isfile(python_file): raise RuntimeError('Could not find the task file: %s' % python_file) task_class = task_file_to_task_class(args.task) DIR_PATH = os.path.dirname(os.path.abspath(__file__)) sim = PyRep() ttt_file = os.path.join(DIR_PATH, '..', 'rlbench', TTT_FILE) sim.launch(ttt_file, headless=True) sim.step_ui() sim.set_simulation_timestep(0.005) sim.step_ui() sim.start() robot = Robot(Panda(), PandaGripper()) active_task = task_class(sim, robot) obs = ObservationConfig() obs.set_all(False) scene = Scene(sim, robot, obs) try: task_smoke(active_task, scene, variation=2) except TaskValidationError as e: sim.shutdown() raise e sim.shutdown() print('Validation successful!')
class RLBenchEnv(gym.Env): """An gym wrapper for RLBench.""" metadata = {'render.modes': ['human']} def __init__(self, task_class, observation_mode='state', action_mode='joint', multi_action_space=False, discrete_action_space=False): self.num_skip_control = 20 self.epi_obs = [] self.obs_record = True self.obs_record_id = None self._observation_mode = observation_mode self.obs_config = ObservationConfig() if observation_mode == 'state': self.obs_config.set_all_high_dim(False) self.obs_config.set_all_low_dim(True) elif observation_mode == 'state-goal': self.obs_config.set_all_high_dim(False) self.obs_config.set_all_low_dim(True) self.obs_config.set_goal_info(True) elif observation_mode == 'vision': self.obs_config.set_all(False) self.obs_config.set_camera_rgb(True) elif observation_mode == 'both': self.obs_config.set_all(True) else: raise ValueError('Unrecognised observation_mode: %s.' % observation_mode) self._action_mode = action_mode self.ac_config = None if action_mode == 'joint': self.ac_config = ActionConfig( SnakeRobotActionConfig.ABS_JOINT_POSITION) elif action_mode == 'trigon': self.ac_config = ActionConfig( SnakeRobotActionConfig.TRIGON_MODEL_PARAM, is_discrete=discrete_action_space) else: raise ValueError('Unrecognised action_mode: %s.' % action_mode) self.env = Environment(action_config=self.ac_config, obs_config=self.obs_config, headless=True) self.env.launch() self.task = self.env.get_task(task_class) self.max_episode_steps = self.task.episode_len _, obs = self.task.reset() if action_mode == 'joint': self.action_space = spaces.Box( low=-1.7, high=1.7, shape=(self.ac_config.action_size, ), dtype=np.float32) elif action_mode == 'trigon': if multi_action_space: # action_space1 = spaces.MultiBinary(n=1) low1 = np.array([-0.8, -0.8]) high1 = np.array([0.8, 0.8]) action_space1 = spaces.Box(low=low1, high=high1, dtype=np.float32) # low = np.array([-0.8, -0.8, 1.0, 3.0, -50, -10, -0.1, -0.1]) # high = np.array([0.8, 0.8, 3.0, 5.0, 50, 10, 0.1, 0.1]) low2 = np.array([1.0]) high2 = np.array([3.0]) action_space2 = spaces.Box(low=low2, high=high2, dtype=np.float32) self.action_space = spaces.Tuple( (action_space1, action_space2)) elif discrete_action_space: self.action_space = spaces.MultiDiscrete([4, 4, 4]) else: # low = np.array([0.0, -0.8, -0.8, 1.0, 3.0, -50, -10, -0.1, -0.1]) # high = np.array([1.0, 0.8, 0.8, 3.0, 5.0, 50, 10, 0.1, 0.1]) low = np.array([-1, -1, -1]) high = np.array([1, 1, 1]) self.action_space = spaces.Box(low=low, high=high, dtype=np.float32) if observation_mode == 'state' or observation_mode == 'state-goal': self.observation_space = spaces.Box( low=-np.inf, high=np.inf, shape=(obs.get_low_dim_data().shape[0] * self.num_skip_control, )) if observation_mode == 'state-goal': self.goal_dim = obs.get_goal_dim() elif observation_mode == 'vision': self.observation_space = spaces.Box( low=0, high=1, shape=obs.head_camera_rgb.shape) elif observation_mode == 'both': self.observation_space = spaces.Dict({ "state": spaces.Box(low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape), "rattler_eye_rgb": spaces.Box(low=0, high=1, shape=obs.head_camera_rgb.shape), "rattler_eye_depth": spaces.Box(low=0, high=1, shape=obs.head_camera_depth.shape), }) self._gym_cam = None def _extract_obs(self, obs): if self._observation_mode == 'state': return obs.get_low_dim_data() elif self._observation_mode == 'state-goal': obs_goal = {'observation': obs.get_low_dim_data()} obs_goal.update(obs.get_goal_data()) return obs_goal elif self._observation_mode == 'vision': return obs.head_camera_rgb elif self._observation_mode == 'both': return { "state": obs.get_low_dim_data(), "rattler_eye_rgb": obs.head_camera_rgb, "rattle_eye_depth": obs.head_camera_depth, } def render(self, mode='human'): if self._gym_cam is None: # # Add the camera to the scene self._gym_cam = VisionSensor('monitor') self._gym_cam.set_resolution([640, 640]) self._gym_cam.set_render_mode(RenderMode.EXTERNAL_WINDOWED) def reset(self): obs_data_group = [] obs_data_dict = { 'observation': [], 'desired_goal': None, 'achieved_goal': None } descriptions, obs = self.task.reset() self.epi_obs.append(obs) obs_data = self._extract_obs(obs) for _ in range(self.num_skip_control): # obs_data_group.extend(obs_data) if isinstance(obs_data, list) or isinstance(obs_data, np.ndarray): obs_data_group.extend(obs_data) elif isinstance(obs_data, dict): obs_data_dict['observation'].extend(obs_data['observation']) obs_data_dict['desired_goal'] = obs_data['desired_goal'] obs_data_dict['achieved_goal'] = obs_data['achieved_goal'] ret_obs = obs_data_group if len(obs_data_group) else obs_data_dict del descriptions # Not used return ret_obs def step(self, action): obs_data_group = [] obs_data_dict = { 'observation': [], 'desired_goal': None, 'achieved_goal': None } reward_group = [] terminate = False for _ in range(self.num_skip_control): obs, reward, step_terminate, success = self.task.step(action) self.epi_obs.append(obs) obs_data = self._extract_obs(obs) if isinstance(obs_data, list) or isinstance(obs_data, np.ndarray): obs_data_group.extend(obs_data) elif isinstance( obs_data, dict): # used for hierarchical reinforcement algorithm obs_data_dict['observation'].extend(obs_data['observation']) obs_data_dict['desired_goal'] = obs_data['desired_goal'] obs_data_dict['achieved_goal'] = obs_data['achieved_goal'] reward_group.append(reward) terminate |= step_terminate if terminate: if self.obs_record and success: # record a successful experience self.record_obs("RobotPos") self.epi_obs = [] break ret_obs = obs_data_group if len(obs_data_group) else obs_data_dict return ret_obs, np.mean(reward_group), terminate, {} def close(self): self.env.shutdown() # def load_env_param(self): # self.env.load_env_param() def compute_reward(self, achieved_goal=None, desired_goal=None, info=None): assert achieved_goal is not None assert desired_goal is not None return self.task.compute_reward(achieved_goal, desired_goal) def record_obs(self, obs_part): if self.obs_record_id is None: record_filenames = glob.glob("./obs_record/obs_record_*.txt") record_filenames.sort(key=lambda filename: int( filename.split('_')[-1].split('.')[0])) if len(record_filenames) == 0: self.obs_record_id = 1 else: last_id = int( record_filenames[-1].split('_')[-1].split('.')[0]) self.obs_record_id = last_id + 1 else: self.obs_record_id += 1 filename = './obs_record/obs_record_' + str( self.obs_record_id) + '.txt' obs_record_file = open(filename, 'w') if obs_part == 'All': pass if obs_part == 'RobotPos': robot_pos_arr = [] for obs in self.epi_obs: robot_pos = obs.get_2d_robot_pos() robot_pos_arr.append(robot_pos) target_pos = self.task.get_goal() robot_pos_arr.append( target_pos) # The last line records the target position robot_pos_arr = np.array(robot_pos_arr) np.savetxt(obs_record_file, robot_pos_arr, fmt="%f") obs_record_file.close()
from rlbench.action_modes import ArmActionMode, ActionMode from rlbench.observation_config import ObservationConfig from rlbench.tasks import ReachTarget import numpy as np class Agent(object): def __init__(self, action_size): self.action_size = action_size def act(self, obs): return (np.random.normal(0.0, 0.1, size=(self.action_size, ))).tolist() obs_config = ObservationConfig() obs_config.set_all(False) obs_config.left_shoulder_camera.rgb = True obs_config.right_shoulder_camera.rgb = True obs_config.wrist_camera.rgb = True action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY) env = Environment(action_mode, obs_config=obs_config, headless=False, robot_configuration='jaco') env.launch() task = env.get_task(ReachTarget) agent = Agent(action_size=7) # Jaco is 6DoF + 1 for gripper action
class ImitationLearning(object): def predict_action(self, batch): return np.random.uniform(size=(len(batch), 7)) def behaviour_cloning_loss(self, ground_truth_actions, predicted_actions): return 1 # To use 'saved' demos, set the path below, and set live_demos=False live_demos = True DATASET = '' if live_demos else 'PATH/TO/YOUR/DATASET' obs_config = ObservationConfig() obs_config.set_all(True) action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY) env = Environment(action_mode, DATASET, obs_config, False) env.launch() task = env.get_task(ReachTarget) il = ImitationLearning() demos = task.get_demos(2, live_demos=live_demos) # -> List[List[Observation]] demos = np.array(demos).flatten() # An example of using the demos to 'train' using behaviour cloning loss. for i in range(100): print("'training' iteration %d" % i)
class FakeRLBenchEnv(Environment): ROBOT_NAME = SUPPORTED_ROBOTS.keys() OBSERVATION_MODE = ("state", "vision", "all") ACTION_MODE = { "joint velocity": ArmActionMode.ABS_JOINT_VELOCITY, "delta joint velocity": ArmActionMode.DELTA_JOINT_VELOCITY, "joint position": ArmActionMode.ABS_JOINT_POSITION, "delta joint position": ArmActionMode.DELTA_JOINT_POSITION, "joint torque": ArmActionMode.ABS_JOINT_TORQUE, "delta joint torque": ArmActionMode.DELTA_JOINT_TORQUE, "effector velocity": ArmActionMode.ABS_EE_VELOCITY, "delta effector velocity": ArmActionMode.DELTA_EE_VELOCITY, "effector position": ArmActionMode.ABS_EE_POSE, "delta effector position": ArmActionMode.DELTA_EE_POSE } def __init__(self, task_name: str, observation_mode: str = "state", action_mode: str = "delta joint position", robot_name: str = "panda"): super(FakeRLBenchEnv, self).__init__(task_name) if task_name not in all_class_names(tasks): raise KeyError(f"Error: unknown task name {task_name}") if observation_mode not in FakeRLBenchEnv.OBSERVATION_MODE: raise KeyError( f"Error: unknown observation mode {observation_mode}, available: {FakeRLBenchEnv.OBSERVATION_MODE}" ) if action_mode not in FakeRLBenchEnv.ACTION_MODE: raise KeyError( f"Error: unknown action mode {action_mode}, available: {FakeRLBenchEnv.ACTION_MODE.keys()}" ) if robot_name not in FakeRLBenchEnv.ROBOT_NAME: raise KeyError( f"Error: unknown robot name {robot_name}, available: {FakeRLBenchEnv.ROBOT_NAME}" ) # TODO: modify the task/robot/arm/gripper to support early instantiation before v-rep launched self._observation_mode = observation_mode self._action_mode = action_mode self._task_name = task_name self._robot_name = robot_name self._observation_config = ObservationConfig() if self._observation_mode == "state": self._observation_config.set_all_low_dim(True) self._observation_config.set_all_high_dim(False) elif self._observation_mode == "vision": self._observation_config.set_all_low_dim(False) self._observation_config.set_all_high_dim(True) elif self._observation_mode == "all": self._observation_config.set_all(True) self._action_config = ActionMode( FakeRLBenchEnv.ACTION_MODE[self._action_mode]) self.env = None self.task = None self._update_info_dict() def init(self, display=False): with suppress_stdout(): self.env = RLEnvironment(action_mode=self._action_config, obs_config=self._observation_config, headless=not display, robot_configuration=self._robot_name) self.env.launch() self.task = self.env.get_task( get_named_class(self._task_name, tasks)) def reset(self, random: bool = True) -> StepDict: if not random: np.random.seed(0) self.task._static_positions = not random descriptions, obs = self.task.reset() # Returns a list of descriptions and the first observation next_step = {"opt": descriptions} if self._observation_mode == "state" or self._observation_mode == "all": next_step['s'] = obs.get_low_dim_data() if self._observation_mode == "vision" or self._observation_mode == "all": next_step["left shoulder rgb"] = obs.left_shoulder_rgb next_step["right_shoulder_rgb"] = obs.right_shoulder_rgb next_step["wrist_rgb"] = obs.wrist_rgb return next_step def step(self, last_step: StepDict) -> (StepDict, bool): assert 'a' in last_step, "Key 'a' for action not in last_step, maybe you passed a wrong dict ?" obs, reward, terminate = self.task.step(last_step['a']) last_step['r'] = reward last_step["info"] = {} next_step = {"opt": None} if self._observation_mode == "state" or self._observation_mode == "all": next_step['s'] = obs.get_low_dim_data() if self._observation_mode == "vision" or self._observation_mode == "all": next_step["left shoulder rgb"] = obs.left_shoulder_rgb next_step["right_shoulder_rgb"] = obs.right_shoulder_rgb next_step["wrist_rgb"] = obs.wrist_rgb return last_step, next_step, terminate def finalize(self) -> bool: with suppress_stdout(): self.env.shutdown() self.task = None self.env = None return True def name(self) -> str: return self._task_name # ------------- private methods ------------- # def _update_info_dict(self): # update info dict self._info["action mode"] = self._action_mode self._info["observation mode"] = self._observation_mode # TODO: action dim should related to robot, not action mode, here we fixed it temporally self._info["action dim"] = (self._action_config.action_size, ) self._info["action low"] = -np.ones(self._info["action dim"], dtype=np.float32) self._info["action high"] = np.ones(self._info["action dim"], dtype=np.float32) if self._observation_mode == "state" or self._observation_mode == "all": # TODO: observation should be determined without init the entire environment with suppress_stdout(): env = RLEnvironment(action_mode=self._action_config, obs_config=self._observation_config, headless=True, robot_configuration=self._robot_name) env.launch() task = env.get_task(get_named_class(self._task_name, tasks)) _, obs = task.reset() env.shutdown() del task del env self._info["time step"] = _DT self._info["state dim"] = tuple(obs.get_low_dim_data().shape) self._info["state low"] = np.ones(self._info["state dim"], dtype=np.float32) * -np.inf self._info["state high"] = np.ones(self._info["state dim"], dtype=np.float32) * np.inf if self._observation_mode == "vision" or self._observation_mode == "all": self._info["left shoulder rgb dim"] = tuple( self._observation_config.left_shoulder_camera.image_size) + ( 3, ) self._info["left shoulder rgb low"] = np.zeros( self._info["left shoulder rgb dim"], dtype=np.float32) self._info["left shoulder rgb high"] = np.ones( self._info["left shoulder rgb dim"], dtype=np.float32) self._info["right shoulder rgb dim"] = tuple( self._observation_config.right_shoulder_camera.image_size) + ( 3, ) self._info["right shoulder rgb low"] = np.zeros( self._info["right shoulder rgb dim"], dtype=np.float32) self._info["right shoulder rgb high"] = np.ones( self._info["right shoulder rgb dim"], dtype=np.float32) self._info["wrist rgb dim"] = tuple( self._observation_config.wrist_camera.image_size) + (3, ) self._info["wrist rgb low"] = np.zeros(self._info["wrist rgb dim"], dtype=np.float32) self._info["wrist rgb high"] = np.ones(self._info["wrist rgb dim"], dtype=np.float32) self._info["reward low"] = -np.inf self._info["reward high"] = np.inf def live_demo(self, amount: int, random: bool = True) -> SampleBatch: """ :param amount: number of demonstration trajectories to be generated :param random: if the starting position is random :return: observation list : [amount x [(steps-1) x [s, a] + [s_term, None]]], WARNING: that the action here is calculated from observation, when executing, they may cause some inaccuracy """ seeds = [rnd.randint(0, 4096) if random else 0 for _ in range(amount)] self.task._static_positions = not random demo_pack = [] for seed in seeds: np.random.seed(seed) pack = self.task.get_demos(1, True)[0] demo_traj = [] np.random.seed(seed) desc, obs = self.task.reset() v_tar = 0. for o_tar in pack[1:]: action = [] if self._action_config.arm == ArmActionMode.ABS_JOINT_VELOCITY: action.extend( (o_tar.joint_positions - obs.joint_positions) / _DT) elif self._action_config.arm == ArmActionMode.ABS_JOINT_POSITION: action.extend(o_tar.joint_positions) elif self._action_config.arm == ArmActionMode.ABS_JOINT_TORQUE: action.extend(o_tar.joint_forces) raise TypeError( "Warning, abs_joint_torque is not currently supported") elif self._action_config.arm == ArmActionMode.ABS_EE_POSE: action.extend(o_tar.gripper_pose) elif self._action_config.arm == ArmActionMode.ABS_EE_VELOCITY: # WARNING: This calculating method is not so accurate since rotation cannot be directed 'add' together # since the original RLBench decides to do so, we should follow it action.extend( (o_tar.gripper_pose - obs.gripper_pose) / _DT) elif self._action_config.arm == ArmActionMode.DELTA_JOINT_VELOCITY: v_tar = (o_tar.joint_positions - obs.joint_positions) / _DT action.extend(v_tar - obs.joint_velocities) raise TypeError( "Warning, delta_joint_velocity is not currently supported" ) elif self._action_config.arm == ArmActionMode.DELTA_JOINT_POSITION: action.extend(o_tar.joint_positions - obs.joint_positions) elif self._action_config.arm == ArmActionMode.DELTA_JOINT_TORQUE: action.extend(o_tar.joint_forces - obs.joint_forces) raise TypeError( "Warning, delta_joint_torque is not currently supported" ) elif self._action_config.arm == ArmActionMode.DELTA_EE_POSE: action.extend(o_tar.gripper_pose[:3] - obs.gripper_pose[:3]) q = Quaternion(o_tar.gripper_pose[3:7]) * Quaternion( obs.gripper_pose[3:7]).conjugate action.extend(list(q)) elif self._action_config.arm == ArmActionMode.DELTA_EE_VELOCITY: # WARNING: This calculating method is not so accurate since rotation cannot be directed 'add' together # since the original RLBench decides to do so, we should follow it v_tar_new = (o_tar.gripper_pose - obs.gripper_pose) / _DT action.extend(v_tar_new - v_tar) v_tar = v_tar_new raise TypeError( "Warning, delta_ee_velocity is not currently supported" ) action.append(1.0 if o_tar.gripper_open > 0.9 else 0.0) action = np.asarray(action, dtype=np.float32) demo_traj.append({ 'observation': obs, 'a': action, 's': obs.get_low_dim_data() }) obs, reward, done = self.task.step(action) demo_traj[-1]['r'] = reward demo_pack.append(demo_traj) return { "trajectory": demo_pack, "config": "default", "policy": "hand-coding", "env class": self.__class__.__name__, "env name": self._task_name, "env config": "default", "observation config": self._observation_mode, "robot config": self._robot_name, "action config": self._action_mode }