def __init__(self, task_name, state_type_list=['left_shoulder_rgb'], dataset_root='', observation_mode='state', headless=True): obs_config = ObservationConfig() obs_config.set_all(True) action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY) self._observation_mode = observation_mode self.env = Environment( action_mode, dataset_root, obs_config=obs_config, headless=headless) # Dont need to call launch as task.get_task can launch env. self.env.launch() self.task = self.env.get_task(task_name) _, obs = self.task.reset() self.action_space = spaces.Box( low=-1.0, high=1.0, shape=(self.env.action_size,), dtype=np.float32) # self.logger = logger.create_logger(__class__.__name__) # self.logger.propagate = 0 if len(state_type_list) > 0: self.observation_space = [] # for state_type in state_type_list: # state = getattr(obs, state_type) self.observation_space = spaces.Box( low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape) else: raise ValueError('No State Type!') self.state_type_list = state_type_list
class RLBenchEnv(): """ make RLBench env to have same interfaces as openai.gym """ def __init__(self, task_name, state_type_list=['left_shoulder_rgb']): obs_config = ObservationConfig() obs_config.set_all(True) action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY) self.env = Environment( action_mode, obs_config=obs_config, headless=False) self.env.launch() try: self.task = self.env.get_task(getattr(sys.modules[__name__], task_name)) except: raise NotImplementedError _, obs = self.task.reset() if len(state_type_list)>0: self.observation_space=[] for state_type in state_type_list: state=getattr(obs, state_type) self.observation_space.append(spaces.Box(low=-np.inf, high=np.inf, shape=state.shape)) else: raise ValueError('No State Type!') self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(action_mode.action_size,), dtype=np.float32) self.state_type_list = state_type_list self.spec = Spec(task_name) def seed(self, seed_value): # set seed as in openai.gym env pass def render(self): # render the scene pass def _get_state(self, obs): if len(self.state_type_list)>0: state=[] for state_type in self.state_type_list: if state_type in image_types: image = getattr(obs, state_type) image=np.moveaxis(image, 2, 0) # change (H, W, C) to (C, H, W) for torch state.append(image) else: state.append(getattr(obs, state_type)) else: raise ValueError('State Type Not Exists!') return state def reset(self): descriptions, obs = self.task.reset() return self._get_state(obs) def step(self, action): obs_, reward, terminate = self.task.step(action) # reward in original rlbench is binary for success or not return self._get_state(obs_), reward, terminate, None def close(self): self.env.shutdown()
def initialize(self, headless=False): DATASET = '' obs_config = ObservationConfig() obs_config.set_all(True) obs_config.left_shoulder_camera.rgb = True obs_config.right_shoulder_camera.rgb = True action_mode = ActionMode(ArmActionMode.ABS_EE_POSE_PLAN) self.env = Environment(action_mode, DATASET, obs_config, headless=headless) self.sensor = NoisyObjectPoseSensor(self.env) self.env.launch() self.task = self.env.get_task(EmptyContainer) self.task.reset() self.home = self.get_objects(False)['large_container'].get_pose() self.home[2] += 0.3 # demos = task.get_demos(3, live_demos=live_demos) self.objs = self.get_objects() self.movable_objects = ['Shape', 'Shape1', 'Shape3'] self.target_bins = ['small_container0'] self.start_bins = ['large_container'] self.max_retry = 5 self.env._robot.gripper.set_joint_forces([50, 50])
def __init__(self, task_name, state_type='left_shoulder_rgb'): obs_config = ObservationConfig() obs_config.set_all(True) action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY) self.env = Environment(action_mode, obs_config=obs_config, headless=False) self.env.launch() try: self.task = self.env.get_task( getattr(sys.modules[__name__], task_name)) except: raise NotImplementedError _, obs = self.task.reset() state = getattr(obs, state_type) self.state_type = state_type self.spec = Spec(task_name) self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(action_mode.action_size, ), dtype=np.float32) self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=state.shape)
def collect_demos(env: Environment, task: Task, taskname, n_iter_per_var=50, n_demos_per_iter=1000): env.launch() task_env = env.get_task(task) for variation_index in range(1): #task_env.variation_count()): # set variation task_env.set_variation(variation_index) description, _ = task_env.reset() # collect and save demos start = time.time() for i in range(n_iter_per_var): np.random.seed(6) demos = task_env.get_demos(n_demos_per_iter, live_demos=True, max_attempts=1) demos = np.array(demos).flatten() print(demos.shape) save_to_hdf5(demos, taskname, description, variation_index) print("TIME: ", time.time() - start) # env.shutdown() return demos
def __init__(self, task_name, state_type_list=None, headless=False): if state_type_list is None: state_type_list = ['left_shoulder_rgb'] obs_config = ObservationConfig() obs_config.set_all(True) action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY) self.env = Environment(action_mode, obs_config=obs_config, headless=headless) self.env.launch() try: self.task = self.env.get_task(ReachTarget) except: raise NotImplementedError _, obs = self.task.reset() if len(state_type_list) > 0: self.observation_space = [] for state_type in state_type_list: state = getattr(obs, state_type) if isinstance(state, (int, float)): shape = (1, ) else: shape = state.shape self.observation_space.append( spaces.Box(low=-np.inf, high=np.inf, shape=shape)) else: raise ValueError('No State Type!') self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(action_mode.action_size, ), dtype=np.float32) self.state_type_list = state_type_list
def __init__(self, action_mode, static_positions=True): # Initialize environment with Action mode and observations # Resize the write camera to fit the GQCNN wrist_camera = CameraConfig(image_size=(1032, 772)) self.env = Environment(action_mode, '', ObservationConfig(wrist_camera=wrist_camera), False, static_positions=static_positions) self.env.launch() # Load specified task into the environment self.task = self.env.get_task(EmptyContainer)
def run_episode(self): self.env = Environment(self._action_mode, "", self._obs_config, headless=self._headless) self.env.launch() self.task = self.env.get_task(self._task_class)
class SimulationEnvionment(): """ This can be a parent class from which we can have multiple child classes that can diversify for different tasks and deeper functions within the tasks. """ def __init__(self,\ action_mode=DEFAULT_ACTION_MODE,\ task=DEFAULT_TASK,\ dataset_root='',\ headless=True): obs_config = ObservationConfig() obs_config.set_all(True) action_mode = action_mode self.env = Environment(action_mode, dataset_root, obs_config=obs_config, headless=headless) # Dont need to call launch as task.get_task can launch env. self.task = self.env.get_task(task) _, obs = self.task.reset() self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(self.env.action_size, ), dtype=np.float32) self.logger = logger.create_logger(__class__.__name__) self.logger.propagate = 0 def _get_state(self, obs: Observation): """ This will be a hook over the environment to get the state. This will ensure that if changes need to be made to attributes of `Observation` then they can be made in a modular way. """ raise NotImplementedError() def reset(self): descriptions, obs = self.task.reset() return self._get_state(obs) def step(self, action): obs_, reward, terminate = self.task.step( action) # reward in original rlbench is binary for success or not return self._get_state(obs_), reward, terminate def shutdown(self): self.logger.info( "Environment Shutdown! Create New Instance If u want to start again" ) self.logger.handlers.pop() self.env.shutdown() def get_demos(self, num_demos): """ Should be implemented by child classes because other extra properties needed before training can be accessed via that. """ raise NotImplementedError()
def __init__(self, task_class, observation_mode='state', render_mode: Union[None, str] = None): self._observation_mode = observation_mode self._render_mode = render_mode obs_config = ObservationConfig() if observation_mode == 'state': obs_config.set_all_high_dim(False) obs_config.set_all_low_dim(True) elif observation_mode == 'vision': obs_config.set_all(True) else: raise ValueError('Unrecognised observation_mode: %s.' % observation_mode) action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY) self.env = Environment(action_mode, obs_config=obs_config, headless=True) self.env.launch() self.task = self.env.get_task(task_class) _, obs = self.task.reset() self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(self.env.action_size, )) if observation_mode == 'state': self.observation_space = spaces.Box( low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape) elif observation_mode == 'vision': self.observation_space = spaces.Dict({ "state": spaces.Box(low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape), "left_shoulder_rgb": spaces.Box(low=0, high=1, shape=obs.left_shoulder_rgb.shape), "right_shoulder_rgb": spaces.Box(low=0, high=1, shape=obs.right_shoulder_rgb.shape), "wrist_rgb": spaces.Box(low=0, high=1, shape=obs.wrist_rgb.shape), "front_rgb": spaces.Box(low=0, high=1, shape=obs.front_rgb.shape), }) if render_mode is not None: # Add the camera to the scene cam_placeholder = Dummy('cam_cinematic_placeholder') self._gym_cam = VisionSensor.create([640, 360]) self._gym_cam.set_pose(cam_placeholder.get_pose()) if render_mode == 'human': self._gym_cam.set_render_mode(RenderMode.OPENGL3_WINDOWED) else: self._gym_cam.set_render_mode(RenderMode.OPENGL3)
def sub_proc(child): action_mode = ActionMode() env = Environment(action_mode, headless=child) env.launch() task = env.get_task(CloseMicrowave) descriptions, obs = task.reset() for i in range(1000): obs, reward, terminate = task.step(np.random.normal(np.zeros(action_mode.action_size)))
def get_task(self, task_class, arm_action_mode): obs_config = ObservationConfig() obs_config.set_all(False) obs_config.set_all_low_dim(True) obs_config.right_shoulder_camera.rgb = True action_mode = ActionMode(arm_action_mode) self.env = Environment( action_mode, ASSET_DIR, obs_config, headless=True) self.env.launch() return self.env.get_task(task_class)
def __init__(self, task_class, state_dim, n_features, n_cluster, n_latent, parameters=None, headless=False, cov_reg=1E-8, n_samples=50, dense_reward=False, imitation_noise=0.03): obs_config = ObservationConfig() obs_config.set_all_low_dim(True) obs_config.set_all_high_dim(False) self._obs_config = obs_config self._state_dim = state_dim self._headless = headless action_mode = ActionMode(ArmActionMode.ABS_JOINT_POSITION) self._task_class = task_class self._action_mode = action_mode self.env = Environment(action_mode, "", obs_config, headless=headless) self.env.launch() self.task = self.env.get_task(task_class) if parameters is None: self.parameters = np.load( "parameters/%s_%d.npy" % (self.task.get_name(), n_features))[:n_samples] # parameters = np.concatenate([parameters for _ in range(20)]) self.imitation_noise = imitation_noise self.parameters[:, :3] += imitation_noise * np.random.normal( size=self.parameters[:, :3].shape) self.mppca = MPPCA(n_cluster, n_latent, n_iterations=30, cov_reg=cov_reg, n_init=500) self.mppca.fit(self.parameters) self.rlmppca = None self.dense_reward = dense_reward print(np.exp(self.mppca.log_pi)) group = Group("rlbench", ["d%d" % i for i in range(7)] + ["gripper"]) self.space = ClassicSpace(group, n_features=n_features) print("mpcca learned")
def __init__(self, task_name: str, state_type: list = 'state', ): # render_mode=None): """ create RL Bench environment :param task_name: task names can be found in rlbench.tasks :param state_type: state or vision or a sub list of state_types list like ['left_shoulder_rgb'] """ if state_type == 'state' or state_type == 'vision' or isinstance(state_type, list): self._state_type = state_type else: raise ValueError('State type value error, your value is {}'.format(state_type)) # self._render_mode = render_mode self._render_mode = None obs_config = ObservationConfig() obs_config.set_all(True) action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY) self.env = Environment( action_mode, obs_config=obs_config, headless=True) self.env.launch() try: self.task = self.env.get_task(getattr(sys.modules[__name__], task_name)) except: raise NotImplementedError _, obs = self.task.reset() self.spec = Spec(task_name) if self._state_type == 'state': self.observation_space = spaces.Box( low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape) elif self._state_type == 'vision': space_dict = OrderedDict() space_dict["state"] = spaces.Box( low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape) for i in ["left_shoulder_rgb", "right_shoulder_rgb", "wrist_rgb", "front_rgb"]: space_dict[i] = spaces.Box( low=0, high=1, shape=getattr(obs, i).shape) self.observation_space = spaces.Dict(space_dict) else: space_dict = OrderedDict() for name in self._state_type: if name.split('_')[-1] in ('rgb', 'depth', 'mask'): space_dict[name] = spaces.Box( low=0, high=1, shape=getattr(obs, name).shape) else: space_dict[name] = spaces.Box( low=-np.inf, high=np.inf, shape=getattr(obs, name).shape) self.observation_space = spaces.Dict(space_dict) self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(self.env.action_size,), dtype=np.float32)
def get_env(self, task=None): task = task if task else self.config.env_type if self.use_gym: assert type( task ) == str # NOTE : When using gym, the task has to be represented as a sting. assert self.observation_mode in ['vision', 'state'] env = gym.make( task, observation_mode=self.config.env_args['observation_mode'], render_mode=self.config.env_args['render_mode']) self.env_obj = env else: obs_config = ObservationConfig() if self.observation_mode == 'vision': obs_config.set_all(True) elif self.observation_mode == 'state': obs_config.set_all_high_dim(False) obs_config.set_all_low_dim(True) else: obs_config_dict = { self.left_obv_key: obs_config.left_shoulder_camera, self.right_obv_key: obs_config.right_shoulder_camera, self.wrist_obv_key: obs_config.wrist_camera, self.front_obv_key: obs_config.front_camera } assert self.observation_mode in obs_config_dict.keys() obs_config.set_all_high_dim(False) obs_config_dict[self.observation_mode].set_all(True) obs_config.set_all_low_dim(True) # TODO : Write code to change it from env_args action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY) self.env_obj = Environment(action_mode, obs_config=obs_config, headless=True) task = task if task else ReachTarget if type(task) == str: task = task.split('-')[0] task = self.env_obj._string_to_task(task) self.env_obj.launch() env = self.env_obj.get_task( task) # NOTE : `env` refered as `task` in RLBench docs. return env
def __init__(self,\ action_mode=DEFAULT_ACTION_MODE,\ task=DEFAULT_TASK,\ dataset_root='',\ headless=True): obs_config = ObservationConfig() obs_config.set_all(True) action_mode = action_mode self.env = Environment( action_mode,dataset_root,obs_config=obs_config, headless=headless) # Dont need to call launch as task.get_task can launch env. self.task = self.env.get_task(task) _, obs = self.task.reset() self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(self.env.action_size,), dtype=np.float32) self.logger = logger.create_logger(__class__.__name__) self.logger.propagate = 0
class RLBenchEnv(): """ make RLBench env to have same interfaces as openai.gym """ def __init__(self, task_name, state_type='left_shoulder_rgb'): obs_config = ObservationConfig() obs_config.set_all(True) action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY) self.env = Environment(action_mode, obs_config=obs_config, headless=False) self.env.launch() try: self.task = self.env.get_task( getattr(sys.modules[__name__], task_name)) except: raise NotImplementedError _, obs = self.task.reset() state = getattr(obs, state_type) self.state_type = state_type self.spec = Spec(task_name) self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(action_mode.action_size, ), dtype=np.float32) self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=state.shape) def seed(self, seed_value): # set seed as in openai.gym env pass def render(self): # render the scene pass def reset(self): descriptions, obs = self.task.reset() return getattr(obs, self.state_type) def step(self, action): obs_, reward, terminate = self.task.step(action) return getattr(obs_, self.state_type), reward, terminate, None def close(self): self.env.shutdown()
def __init__(self, h): #64x64 camera outputs cam = CameraConfig(image_size=(64, 64)) obs_config = ObservationConfig(left_shoulder_camera=cam, right_shoulder_camera=cam, wrist_camera=cam, front_camera=cam) obs_config.set_all(True) # delta EE control with motion planning action_mode = ActionMode(ArmActionMode.DELTA_EE_POSE_PLAN_WORLD_FRAME) #Inits self.env = Environment(action_mode, obs_config=obs_config, headless=h) self.env.launch() self.task = self.env.get_task(ReachTarget)
def init(self, display=False): with suppress_stdout(): self.env = RLEnvironment(action_mode=self._action_config, obs_config=self._observation_config, headless=not display, robot_configuration=self._robot_name) self.env.launch() self.task = self.env.get_task( get_named_class(self._task_name, tasks))
def __init__(self, task_class, observation_mode='state'): self._observation_mode = observation_mode obs_config = ObservationConfig() if observation_mode == 'state': obs_config.set_all_high_dim(False) obs_config.set_all_low_dim(True) elif observation_mode == 'vision': obs_config.set_all(True) else: raise ValueError('Unrecognised observation_mode: %s.' % observation_mode) action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY) self.env = Environment(action_mode, obs_config=obs_config, headless=True) self.env.launch() self.task = self.env.get_task(task_class) _, obs = self.task.reset() self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(action_mode.action_size, )) if observation_mode == 'state': self.observation_space = spaces.Box( low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape) elif observation_mode == 'vision': self.observation_space = spaces.Dict({ "state": spaces.Box(low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape), "left_shoulder_rgb": spaces.Box(low=0, high=1, shape=obs.left_shoulder_rgb.shape), "right_shoulder_rgb": spaces.Box(low=0, high=1, shape=obs.right_shoulder_rgb.shape), "wrist_rgb": spaces.Box(low=0, high=1, shape=obs.wrist_rgb.shape), }) self._gym_cam = None
def act(graph,model,sess): obs_config = ObservationConfig() # obs_config.set_all(True) # 这几个参数用来关闭摄像头 obs_config.right_shoulder_camera.set_all(False) obs_config.wrist_camera.set_all(False) obs_config.left_shoulder_camera.set_all(False) action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY) env = Environment( action_mode, obs_config=obs_config, headless=False) env.launch() task = env.get_task(ReachTarget) training_steps = 150 episode_length = 60 obs = None gripper = [1.0] # 爪子保持打开 for i in range(training_steps): # 初始化一次任务,包括初始Obs数据 if i % episode_length == 0: print('Reset Episode') descriptions, obs = task.reset() print(descriptions) # 网络的输入 vision = obs.front_rgb vision = vision[np.newaxis,:] state = obs.joint_positions state = state[np.newaxis,:] action = None feed_dict = { model.vision:vision, model.state:state, model.action:action } run_ops = [model.predict_out] with graph.as_default(): res = sess.run(run_ops,feed_dict=feed_dict) # print(res[0][0].shape) # print(gripper.shape) # input('check shape') action = np.concatenate([res[0][0],gripper],axis=-1) obs, reward, terminate = task.step(action) if reward == 1: print("successful try!") print('Done') env.shutdown()
def __init__(self, task_class, state_dim, n_features, headless=True): super().__init__(n_features) self._group = Group("rlbench", ["d%d" % i for i in range(7)] + ["gripper"]) self._space = ClassicSpace(self._group, n_features) obs_config = ObservationConfig() obs_config.set_all_low_dim(True) obs_config.set_all_high_dim(False) self._obs_config = obs_config self._state_dim = state_dim self._headless = headless action_mode = ActionMode(ArmActionMode.ABS_JOINT_POSITION) self._task_class = task_class self._action_mode = action_mode self.env = Environment( action_mode, "", obs_config, headless=headless) self.env.launch() self.task = self.env.get_task(task_class) self._obs = None
def simulation(q): # See rlbench/action_modes.py for other action modes withLearning = False action_mode = ActionMode(ArmActionMode.ABS_EE_POSE_PLAN) action_mode_v = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY) if withLearning: env = Environment(action_mode_v, '', ObservationConfig(), False) else: env = Environment(action_mode, '', ObservationConfig(), False) task = env.get_task(EmptyContainer) obj_pose_sensor = NoisyObjectPoseSensor(env) descriptions, obs = task.reset() allPoses = obj_pose_sensor.get_poses() agent = RandomAgent(allPoses["large_container"], allPoses["small_container0"], env) print(descriptions) obj_poses = obj_pose_sensor.get_poses() training_steps = 10000 episode_length = 100 i = 0 rewardFunc = Reward(env, ['Shape0', 'Shape2', 'Shape4'], "small_container0") while i < training_steps: # Getting noisy object poses obj_poses = obj_pose_sensor.get_poses() mask = env._scene._cam_wrist_mask.capture_rgb() data = (obs.wrist_depth, obs.wrist_rgb, mask) if withLearning: if i > 0 and i % episode_length == 0: print(i, 'Reset Episode') agent.updateV() i += 1 continue oldState, action = agent.act(obs) obs, _, terminate = task.step(action) reward = rewardFunc.evaluate(obs) print(reward) nextState = np.zeros(8) nextState[:7] = obs.joint_positions nextState[7] = obs.gripper_open > 0.5 agent.updateQ(oldState, action, nextState, reward) i += 1 else: q.put(data) nextState = agent.resetAct(obs, obj_poses) obs, reward, terminate = task.step(nextState) # if terminate: # break env.shutdown()
def _update_info_dict(self): # update info dict self._info["action mode"] = self._action_mode self._info["observation mode"] = self._observation_mode # TODO: action dim should related to robot, not action mode, here we fixed it temporally self._info["action dim"] = (self._action_config.action_size, ) self._info["action low"] = -np.ones(self._info["action dim"], dtype=np.float32) self._info["action high"] = np.ones(self._info["action dim"], dtype=np.float32) if self._observation_mode == "state" or self._observation_mode == "all": # TODO: observation should be determined without init the entire environment with suppress_stdout(): env = RLEnvironment(action_mode=self._action_config, obs_config=self._observation_config, headless=True, robot_configuration=self._robot_name) env.launch() task = env.get_task(get_named_class(self._task_name, tasks)) _, obs = task.reset() env.shutdown() del task del env self._info["time step"] = _DT self._info["state dim"] = tuple(obs.get_low_dim_data().shape) self._info["state low"] = np.ones(self._info["state dim"], dtype=np.float32) * -np.inf self._info["state high"] = np.ones(self._info["state dim"], dtype=np.float32) * np.inf if self._observation_mode == "vision" or self._observation_mode == "all": self._info["left shoulder rgb dim"] = tuple( self._observation_config.left_shoulder_camera.image_size) + ( 3, ) self._info["left shoulder rgb low"] = np.zeros( self._info["left shoulder rgb dim"], dtype=np.float32) self._info["left shoulder rgb high"] = np.ones( self._info["left shoulder rgb dim"], dtype=np.float32) self._info["right shoulder rgb dim"] = tuple( self._observation_config.right_shoulder_camera.image_size) + ( 3, ) self._info["right shoulder rgb low"] = np.zeros( self._info["right shoulder rgb dim"], dtype=np.float32) self._info["right shoulder rgb high"] = np.ones( self._info["right shoulder rgb dim"], dtype=np.float32) self._info["wrist rgb dim"] = tuple( self._observation_config.wrist_camera.image_size) + (3, ) self._info["wrist rgb low"] = np.zeros(self._info["wrist rgb dim"], dtype=np.float32) self._info["wrist rgb high"] = np.ones(self._info["wrist rgb dim"], dtype=np.float32) self._info["reward low"] = -np.inf self._info["reward high"] = np.inf
def job_worker_validation(worker_id, action_mode, obs_config, task_class, command_q: mp.Queue, result_q: mp.Queue, obs_scaling, redundancy_resolution_setup, seed): np.random.seed(worker_id + seed) # setup the environment env = Environment(action_mode=action_mode, obs_config=obs_config, headless=True) env.launch() task = env.get_task(task_class) task.reset() while True: command = command_q.get() command_type = command[0] command_args = command[1] if command_type == "reset": task.set_variation(task.sample_variation()) descriptions, observation = task.reset() if obs_scaling is not None: observation = observation.get_low_dim_data() / obs_scaling else: observation = observation.get_low_dim_data() result_q.put((descriptions, observation)) elif command_type == "step": actions = command_args[0] # check if we need to resolve redundancy if redundancy_resolution_setup["use_redundancy_resolution"]: actions[0:7], L = task.resolve_redundancy_joint_velocities( actions=actions[0:7], setup=redundancy_resolution_setup) else: # here we only care about the loss (for logging) _, L = task.resolve_redundancy_joint_velocities( actions=actions[0:7], setup=redundancy_resolution_setup) next_observation, reward, done = task.step(actions) if obs_scaling is not None: next_observation = next_observation.get_low_dim_data( ) / obs_scaling else: next_observation = next_observation.get_low_dim_data() result_q.put((next_observation, reward, done, L)) elif command_type == "kill": print("Killing worker %d" % worker_id) env.shutdown() break
class RLBenchEnv(object): """ make RLBench env to have same interfaces as openai.gym """ def __init__(self, task_name, state_type_list=None, headless=False): if state_type_list is None: state_type_list = ['left_shoulder_rgb'] obs_config = ObservationConfig() obs_config.set_all(True) action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY) self.env = Environment(action_mode, obs_config=obs_config, headless=headless) self.env.launch() try: self.task = self.env.get_task(ReachTarget) except: raise NotImplementedError _, obs = self.task.reset() if len(state_type_list) > 0: self.observation_space = [] for state_type in state_type_list: state = getattr(obs, state_type) if isinstance(state, (int, float)): shape = (1, ) else: shape = state.shape self.observation_space.append( spaces.Box(low=-np.inf, high=np.inf, shape=shape)) else: raise ValueError('No State Type!') self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(action_mode.action_size, ), dtype=np.float32) self.state_type_list = state_type_list def seed(self, seed_value): # set seed as in openai.gym env pass def render(self): # render the scene pass def _get_state(self, obs): if len(self.state_type_list) > 0: state = [] for state_type in self.state_type_list: if state_type in image_types: image = getattr(obs, state_type) image = np.moveaxis( image, 2, 0) # change (H, W, C) to (C, H, W) for torch state.append(image) elif state_type in primitive_types: state.append(np.array([getattr(obs, state_type)])) else: state.append(getattr(obs, state_type)) else: raise ValueError('State Type Not Exists!') return state def reset(self): descriptions, obs = self.task.reset() return self._get_state(obs) def step(self, action): obs, reward, terminate = self.task.step( action) # reward in original rlbench is binary for success or not # distance between current pos (tip) and target pos # x, y, z, qx, qy, qz, qw = self.task._robot.arm.get_tip().get_pose() x, y, z, qx, qy, qz, qw = self.task._robot.gripper.get_pose() tar_x, tar_y, tar_z, _, _, _, _ = self.task._task.target.get_pose() distance = (np.abs(x - tar_x) + np.abs(y - tar_y) + np.abs(z - tar_z)) reward -= 1. * distance # speed regulation joint_velocities = self.task._robot.arm.get_joint_velocities() velocities = np.inner(joint_velocities, joint_velocities) reward -= 0.05 / len(joint_velocities) * velocities # orientation gripper_x, gripper_y, gripper_z, gripper_qx, gripper_qy, gripper_qz, gripper_qw = self.task._robot.gripper.get_pose( ) v1 = np.array( (tar_x - gripper_x, tar_y - gripper_y, tar_z - gripper_z)) temp = np.sin(np.arccos(gripper_qw)) v2 = np.array( (gripper_qx / temp, gripper_qy / temp, gripper_qz / temp)) orientation_angle = np.abs( np.arccos(v1.dot(v2) / np.sqrt(v1.dot(v1)) / np.sqrt(v2.dot(v2)))) # [0, pi] orientation_angle = orientation_angle if orientation_angle < np.pi / 2. else ( np.pi - orientation_angle) reward -= 0.5 / np.pi * orientation_angle # if orientation_angle > np.pi / 8. else 0. return self._get_state(obs), reward, terminate, None def close(self): self.env.shutdown()
pose[:3] += pos pose[3:] = [ perturbed_quat_wxyz.x, perturbed_quat_wxyz.y, perturbed_quat_wxyz.z, perturbed_quat_wxyz.w ] obj_poses[name] = pose return obj_poses if __name__ == "__main__": # action_mode = ActionMode(ArmActionMode.DELTA_EE_POSE) # See rlbench/action_modes.py for other action modes # action_mode = ActionMode(ArmActionMode.DELTA_EE_POSE_PLAN) action_mode = ActionMode(ArmActionMode.ABS_EE_POSE_PLAN) env = Environment(action_mode, '', ObservationConfig(), False, frequency=5) # task = env.get_task(StackBlocks) # available tasks: EmptyContainer, PlayJenga, PutGroceriesInCupboard, SetTheTable task = env.get_task( PlayJenga ) # available tasks: EmptyContainer, PlayJenga, PutGroceriesInCupboard, SetTheTable obj_pose_sensor = NoisyObjectPoseSensor(env) descriptions, obs = task.reset() agent = Agent(obs, obj_pose_sensor.get_poses()) while True: # Getting noisy object poses obj_poses = obj_pose_sensor.get_poses() # Getting various fields from obs current_joints = obs.joint_positions
def run_validation(self, model): """ Runs validation for presentation purposes :param model: Tensorflow model for action prediction """ if self.rand_env: env = DomainRandomizationEnvironment( action_mode=self.action_mode, obs_config=self.obs_config, headless=self.headless, randomize_every=self.randomize_every, visual_randomization_config=self.visual_rand_config) else: env = Environment(action_mode=self.action_mode, obs_config=self.obs_config, headless=self.headless) env.launch() task = env.get_task(self.task_class) task.reset() print("Initialized Environment for validation") observation = None episode = 0 step = 0 done = False logger = CmdLineLogger(self.logging_interval, self.training_episodes, 1) evaluator = SuccessEvaluator() # containers for validation reward_per_episode = 0 dones = 0 rewards = 0 episode_lengths = 0 # we can save all enables camera inputs to root log dir if we want if self.save_camera_input: camcorder = Camcorder(self.root_log_dir, 0) while episode < self.training_episodes: # reset episode if maximal length is reached or all worker are done if step % self.episode_length == 0 or done: descriptions, observation = task.reset() if self.save_camera_input: camcorder.save(observation, task.get_robot_visuals(), task.get_all_graspable_objects()) if self.obs_scaling_vector is not None: observation = observation.get_low_dim_data( ) / self.obs_scaling_vector else: observation = observation.get_low_dim_data() logger(episode, evaluator.successful_episodes, evaluator.successful_episodes_perc) if step != 0: rewards += reward_per_episode episode_lengths += step step = 0 reward_per_episode = 0 episode += 1 # predict action actions = np.squeeze(model.predict(tf.constant([observation]))) actions[0:7] = actions[0:7] # check if we need to resolve redundancy if self.use_redundancy_resolution: actions[0:7], _ = task.resolve_redundancy_joint_velocities( actions=actions[0:7], setup=self.redundancy_resolution_setup) # check if we need to use a gripper actions if self.keep_gripper_open: actions[7] = 1 # increment simulation next_observation, reward, done = task.step( actions, camcorder if self.save_camera_input else None) if self.obs_scaling_vector is not None: next_observation = next_observation.get_low_dim_data( ) / self.obs_scaling_vector else: next_observation = next_observation.get_low_dim_data() evaluator.add(episode, done) observation = next_observation step += 1 reward_per_episode += reward dones += done env.shutdown() print("\nDone.\n") return rewards / self.episode_length, dones / self.training_episodes, episode_lengths / self.training_episodes
def __init__(self, task_class, observation_mode='state', action_mode='joint', multi_action_space=False, discrete_action_space=False): self.num_skip_control = 20 self.epi_obs = [] self.obs_record = True self.obs_record_id = None self._observation_mode = observation_mode self.obs_config = ObservationConfig() if observation_mode == 'state': self.obs_config.set_all_high_dim(False) self.obs_config.set_all_low_dim(True) elif observation_mode == 'state-goal': self.obs_config.set_all_high_dim(False) self.obs_config.set_all_low_dim(True) self.obs_config.set_goal_info(True) elif observation_mode == 'vision': self.obs_config.set_all(False) self.obs_config.set_camera_rgb(True) elif observation_mode == 'both': self.obs_config.set_all(True) else: raise ValueError('Unrecognised observation_mode: %s.' % observation_mode) self._action_mode = action_mode self.ac_config = None if action_mode == 'joint': self.ac_config = ActionConfig( SnakeRobotActionConfig.ABS_JOINT_POSITION) elif action_mode == 'trigon': self.ac_config = ActionConfig( SnakeRobotActionConfig.TRIGON_MODEL_PARAM, is_discrete=discrete_action_space) else: raise ValueError('Unrecognised action_mode: %s.' % action_mode) self.env = Environment(action_config=self.ac_config, obs_config=self.obs_config, headless=True) self.env.launch() self.task = self.env.get_task(task_class) self.max_episode_steps = self.task.episode_len _, obs = self.task.reset() if action_mode == 'joint': self.action_space = spaces.Box( low=-1.7, high=1.7, shape=(self.ac_config.action_size, ), dtype=np.float32) elif action_mode == 'trigon': if multi_action_space: # action_space1 = spaces.MultiBinary(n=1) low1 = np.array([-0.8, -0.8]) high1 = np.array([0.8, 0.8]) action_space1 = spaces.Box(low=low1, high=high1, dtype=np.float32) # low = np.array([-0.8, -0.8, 1.0, 3.0, -50, -10, -0.1, -0.1]) # high = np.array([0.8, 0.8, 3.0, 5.0, 50, 10, 0.1, 0.1]) low2 = np.array([1.0]) high2 = np.array([3.0]) action_space2 = spaces.Box(low=low2, high=high2, dtype=np.float32) self.action_space = spaces.Tuple( (action_space1, action_space2)) elif discrete_action_space: self.action_space = spaces.MultiDiscrete([4, 4, 4]) else: # low = np.array([0.0, -0.8, -0.8, 1.0, 3.0, -50, -10, -0.1, -0.1]) # high = np.array([1.0, 0.8, 0.8, 3.0, 5.0, 50, 10, 0.1, 0.1]) low = np.array([-1, -1, -1]) high = np.array([1, 1, 1]) self.action_space = spaces.Box(low=low, high=high, dtype=np.float32) if observation_mode == 'state' or observation_mode == 'state-goal': self.observation_space = spaces.Box( low=-np.inf, high=np.inf, shape=(obs.get_low_dim_data().shape[0] * self.num_skip_control, )) if observation_mode == 'state-goal': self.goal_dim = obs.get_goal_dim() elif observation_mode == 'vision': self.observation_space = spaces.Box( low=0, high=1, shape=obs.head_camera_rgb.shape) elif observation_mode == 'both': self.observation_space = spaces.Dict({ "state": spaces.Box(low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape), "rattler_eye_rgb": spaces.Box(low=0, high=1, shape=obs.head_camera_rgb.shape), "rattler_eye_depth": spaces.Box(low=0, high=1, shape=obs.head_camera_depth.shape), }) self._gym_cam = None
class RLBenchEnv(gym.Env): """An gym wrapper for RLBench.""" metadata = {'render.modes': ['human']} def __init__(self, task_class, observation_mode='state', action_mode='joint', multi_action_space=False, discrete_action_space=False): self.num_skip_control = 20 self.epi_obs = [] self.obs_record = True self.obs_record_id = None self._observation_mode = observation_mode self.obs_config = ObservationConfig() if observation_mode == 'state': self.obs_config.set_all_high_dim(False) self.obs_config.set_all_low_dim(True) elif observation_mode == 'state-goal': self.obs_config.set_all_high_dim(False) self.obs_config.set_all_low_dim(True) self.obs_config.set_goal_info(True) elif observation_mode == 'vision': self.obs_config.set_all(False) self.obs_config.set_camera_rgb(True) elif observation_mode == 'both': self.obs_config.set_all(True) else: raise ValueError('Unrecognised observation_mode: %s.' % observation_mode) self._action_mode = action_mode self.ac_config = None if action_mode == 'joint': self.ac_config = ActionConfig( SnakeRobotActionConfig.ABS_JOINT_POSITION) elif action_mode == 'trigon': self.ac_config = ActionConfig( SnakeRobotActionConfig.TRIGON_MODEL_PARAM, is_discrete=discrete_action_space) else: raise ValueError('Unrecognised action_mode: %s.' % action_mode) self.env = Environment(action_config=self.ac_config, obs_config=self.obs_config, headless=True) self.env.launch() self.task = self.env.get_task(task_class) self.max_episode_steps = self.task.episode_len _, obs = self.task.reset() if action_mode == 'joint': self.action_space = spaces.Box( low=-1.7, high=1.7, shape=(self.ac_config.action_size, ), dtype=np.float32) elif action_mode == 'trigon': if multi_action_space: # action_space1 = spaces.MultiBinary(n=1) low1 = np.array([-0.8, -0.8]) high1 = np.array([0.8, 0.8]) action_space1 = spaces.Box(low=low1, high=high1, dtype=np.float32) # low = np.array([-0.8, -0.8, 1.0, 3.0, -50, -10, -0.1, -0.1]) # high = np.array([0.8, 0.8, 3.0, 5.0, 50, 10, 0.1, 0.1]) low2 = np.array([1.0]) high2 = np.array([3.0]) action_space2 = spaces.Box(low=low2, high=high2, dtype=np.float32) self.action_space = spaces.Tuple( (action_space1, action_space2)) elif discrete_action_space: self.action_space = spaces.MultiDiscrete([4, 4, 4]) else: # low = np.array([0.0, -0.8, -0.8, 1.0, 3.0, -50, -10, -0.1, -0.1]) # high = np.array([1.0, 0.8, 0.8, 3.0, 5.0, 50, 10, 0.1, 0.1]) low = np.array([-1, -1, -1]) high = np.array([1, 1, 1]) self.action_space = spaces.Box(low=low, high=high, dtype=np.float32) if observation_mode == 'state' or observation_mode == 'state-goal': self.observation_space = spaces.Box( low=-np.inf, high=np.inf, shape=(obs.get_low_dim_data().shape[0] * self.num_skip_control, )) if observation_mode == 'state-goal': self.goal_dim = obs.get_goal_dim() elif observation_mode == 'vision': self.observation_space = spaces.Box( low=0, high=1, shape=obs.head_camera_rgb.shape) elif observation_mode == 'both': self.observation_space = spaces.Dict({ "state": spaces.Box(low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape), "rattler_eye_rgb": spaces.Box(low=0, high=1, shape=obs.head_camera_rgb.shape), "rattler_eye_depth": spaces.Box(low=0, high=1, shape=obs.head_camera_depth.shape), }) self._gym_cam = None def _extract_obs(self, obs): if self._observation_mode == 'state': return obs.get_low_dim_data() elif self._observation_mode == 'state-goal': obs_goal = {'observation': obs.get_low_dim_data()} obs_goal.update(obs.get_goal_data()) return obs_goal elif self._observation_mode == 'vision': return obs.head_camera_rgb elif self._observation_mode == 'both': return { "state": obs.get_low_dim_data(), "rattler_eye_rgb": obs.head_camera_rgb, "rattle_eye_depth": obs.head_camera_depth, } def render(self, mode='human'): if self._gym_cam is None: # # Add the camera to the scene self._gym_cam = VisionSensor('monitor') self._gym_cam.set_resolution([640, 640]) self._gym_cam.set_render_mode(RenderMode.EXTERNAL_WINDOWED) def reset(self): obs_data_group = [] obs_data_dict = { 'observation': [], 'desired_goal': None, 'achieved_goal': None } descriptions, obs = self.task.reset() self.epi_obs.append(obs) obs_data = self._extract_obs(obs) for _ in range(self.num_skip_control): # obs_data_group.extend(obs_data) if isinstance(obs_data, list) or isinstance(obs_data, np.ndarray): obs_data_group.extend(obs_data) elif isinstance(obs_data, dict): obs_data_dict['observation'].extend(obs_data['observation']) obs_data_dict['desired_goal'] = obs_data['desired_goal'] obs_data_dict['achieved_goal'] = obs_data['achieved_goal'] ret_obs = obs_data_group if len(obs_data_group) else obs_data_dict del descriptions # Not used return ret_obs def step(self, action): obs_data_group = [] obs_data_dict = { 'observation': [], 'desired_goal': None, 'achieved_goal': None } reward_group = [] terminate = False for _ in range(self.num_skip_control): obs, reward, step_terminate, success = self.task.step(action) self.epi_obs.append(obs) obs_data = self._extract_obs(obs) if isinstance(obs_data, list) or isinstance(obs_data, np.ndarray): obs_data_group.extend(obs_data) elif isinstance( obs_data, dict): # used for hierarchical reinforcement algorithm obs_data_dict['observation'].extend(obs_data['observation']) obs_data_dict['desired_goal'] = obs_data['desired_goal'] obs_data_dict['achieved_goal'] = obs_data['achieved_goal'] reward_group.append(reward) terminate |= step_terminate if terminate: if self.obs_record and success: # record a successful experience self.record_obs("RobotPos") self.epi_obs = [] break ret_obs = obs_data_group if len(obs_data_group) else obs_data_dict return ret_obs, np.mean(reward_group), terminate, {} def close(self): self.env.shutdown() # def load_env_param(self): # self.env.load_env_param() def compute_reward(self, achieved_goal=None, desired_goal=None, info=None): assert achieved_goal is not None assert desired_goal is not None return self.task.compute_reward(achieved_goal, desired_goal) def record_obs(self, obs_part): if self.obs_record_id is None: record_filenames = glob.glob("./obs_record/obs_record_*.txt") record_filenames.sort(key=lambda filename: int( filename.split('_')[-1].split('.')[0])) if len(record_filenames) == 0: self.obs_record_id = 1 else: last_id = int( record_filenames[-1].split('_')[-1].split('.')[0]) self.obs_record_id = last_id + 1 else: self.obs_record_id += 1 filename = './obs_record/obs_record_' + str( self.obs_record_id) + '.txt' obs_record_file = open(filename, 'w') if obs_part == 'All': pass if obs_part == 'RobotPos': robot_pos_arr = [] for obs in self.epi_obs: robot_pos = obs.get_2d_robot_pos() robot_pos_arr.append(robot_pos) target_pos = self.task.get_goal() robot_pos_arr.append( target_pos) # The last line records the target position robot_pos_arr = np.array(robot_pos_arr) np.savetxt(obs_record_file, robot_pos_arr, fmt="%f") obs_record_file.close()