Exemple #1
0
    def initialize(self, headless=False):
        DATASET = ''
        obs_config = ObservationConfig()
        obs_config.set_all(True)
        obs_config.left_shoulder_camera.rgb = True
        obs_config.right_shoulder_camera.rgb = True
        action_mode = ActionMode(ArmActionMode.ABS_EE_POSE_PLAN)
        self.env = Environment(action_mode,
                               DATASET,
                               obs_config,
                               headless=headless)
        self.sensor = NoisyObjectPoseSensor(self.env)
        self.env.launch()
        self.task = self.env.get_task(EmptyContainer)
        self.task.reset()
        self.home = self.get_objects(False)['large_container'].get_pose()
        self.home[2] += 0.3
        # demos = task.get_demos(3, live_demos=live_demos)

        self.objs = self.get_objects()
        self.movable_objects = ['Shape', 'Shape1', 'Shape3']
        self.target_bins = ['small_container0']
        self.start_bins = ['large_container']
        self.max_retry = 5
        self.env._robot.gripper.set_joint_forces([50, 50])
Exemple #2
0
    def __init__(self, task_name, state_type='left_shoulder_rgb'):
        obs_config = ObservationConfig()
        obs_config.set_all(True)
        action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY)
        self.env = Environment(action_mode,
                               obs_config=obs_config,
                               headless=False)
        self.env.launch()
        try:
            self.task = self.env.get_task(
                getattr(sys.modules[__name__], task_name))
        except:
            raise NotImplementedError

        _, obs = self.task.reset()
        state = getattr(obs, state_type)
        self.state_type = state_type
        self.spec = Spec(task_name)

        self.action_space = spaces.Box(low=-1.0,
                                       high=1.0,
                                       shape=(action_mode.action_size, ),
                                       dtype=np.float32)
        self.observation_space = spaces.Box(low=-np.inf,
                                            high=np.inf,
                                            shape=state.shape)
 def test_run_task_validator(self):
     for task_file in TASKS:
         test_name = task_file.split('.py')[0]
         with self.subTest(task=test_name):
             if test_name in FLAKY_TASKS:
                 self.skipTest('Flaky task.')
             sim = PyRep()
             ttt_file = os.path.join(
                 DIR_PATH, '..', '..', 'rlbench', TTT_FILE)
             sim.launch(ttt_file, headless=True)
             sim.step_ui()
             sim.set_simulation_timestep(50.0)
             sim.step_ui()
             sim.start()
             robot = Robot(Panda(), PandaGripper())
             obs = ObservationConfig()
             obs.set_all(False)
             scene = Scene(sim, robot, obs)
             sim.start()
             task_class = task_file_to_task_class(task_file)
             active_task = task_class(sim, robot)
             try:
                 task_smoke(active_task, scene, variation=-1,
                            max_variations=2, success=0.25)
             except Exception as e:
                 sim.stop()
                 sim.shutdown()
                 raise e
             sim.stop()
             sim.shutdown()
Exemple #4
0
    def __init__(self,
                 task_name,
                 state_type_list=['left_shoulder_rgb'],
                 dataset_root='',
                 observation_mode='state',
                 headless=True):
        obs_config = ObservationConfig()
        obs_config.set_all(True)
        action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY)
        self._observation_mode = observation_mode
        self.env = Environment(
            action_mode, dataset_root, obs_config=obs_config, headless=headless)
        # Dont need to call launch as task.get_task can launch env.
        self.env.launch()
        self.task = self.env.get_task(task_name)
        _, obs = self.task.reset()
        self.action_space = spaces.Box(
            low=-1.0, high=1.0, shape=(self.env.action_size,), dtype=np.float32)
#         self.logger = logger.create_logger(__class__.__name__)
#         self.logger.propagate = 0
        if len(state_type_list) > 0:
            self.observation_space = []
            # for state_type in state_type_list:
            #     state = getattr(obs, state_type)
            self.observation_space = spaces.Box(
                low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape)      
        else:
            raise ValueError('No State Type!')

        self.state_type_list = state_type_list
Exemple #5
0
    def __init__(self, task_name, state_type_list=None, headless=False):
        if state_type_list is None:
            state_type_list = ['left_shoulder_rgb']
        obs_config = ObservationConfig()
        obs_config.set_all(True)
        action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY)
        self.env = Environment(action_mode,
                               obs_config=obs_config,
                               headless=headless)
        self.env.launch()
        try:
            self.task = self.env.get_task(ReachTarget)
        except:
            raise NotImplementedError

        _, obs = self.task.reset()

        if len(state_type_list) > 0:
            self.observation_space = []
            for state_type in state_type_list:
                state = getattr(obs, state_type)
                if isinstance(state, (int, float)):
                    shape = (1, )
                else:
                    shape = state.shape
                self.observation_space.append(
                    spaces.Box(low=-np.inf, high=np.inf, shape=shape))
        else:
            raise ValueError('No State Type!')
        self.action_space = spaces.Box(low=-1.0,
                                       high=1.0,
                                       shape=(action_mode.action_size, ),
                                       dtype=np.float32)

        self.state_type_list = state_type_list
Exemple #6
0
    def __init__(self,
                 task_class,
                 observation_mode='state',
                 render_mode: Union[None, str] = None):
        self._observation_mode = observation_mode
        self._render_mode = render_mode
        obs_config = ObservationConfig()
        if observation_mode == 'state':
            obs_config.set_all_high_dim(False)
            obs_config.set_all_low_dim(True)
        elif observation_mode == 'vision':
            obs_config.set_all(True)
        else:
            raise ValueError('Unrecognised observation_mode: %s.' %
                             observation_mode)
        action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY)
        self.env = Environment(action_mode,
                               obs_config=obs_config,
                               headless=True)
        self.env.launch()
        self.task = self.env.get_task(task_class)

        _, obs = self.task.reset()

        self.action_space = spaces.Box(low=-1.0,
                                       high=1.0,
                                       shape=(self.env.action_size, ))

        if observation_mode == 'state':
            self.observation_space = spaces.Box(
                low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape)
        elif observation_mode == 'vision':
            self.observation_space = spaces.Dict({
                "state":
                spaces.Box(low=-np.inf,
                           high=np.inf,
                           shape=obs.get_low_dim_data().shape),
                "left_shoulder_rgb":
                spaces.Box(low=0, high=1, shape=obs.left_shoulder_rgb.shape),
                "right_shoulder_rgb":
                spaces.Box(low=0, high=1, shape=obs.right_shoulder_rgb.shape),
                "wrist_rgb":
                spaces.Box(low=0, high=1, shape=obs.wrist_rgb.shape),
                "front_rgb":
                spaces.Box(low=0, high=1, shape=obs.front_rgb.shape),
            })

        if render_mode is not None:
            # Add the camera to the scene
            cam_placeholder = Dummy('cam_cinematic_placeholder')
            self._gym_cam = VisionSensor.create([640, 360])
            self._gym_cam.set_pose(cam_placeholder.get_pose())
            if render_mode == 'human':
                self._gym_cam.set_render_mode(RenderMode.OPENGL3_WINDOWED)
            else:
                self._gym_cam.set_render_mode(RenderMode.OPENGL3)
Exemple #7
0
 def get_task(self, task_class, arm_action_mode):
     obs_config = ObservationConfig()
     obs_config.set_all(False)
     obs_config.set_all_low_dim(True)
     obs_config.right_shoulder_camera.rgb = True
     action_mode = ActionMode(arm_action_mode)
     self.env = Environment(
         action_mode, ASSET_DIR, obs_config, headless=True)
     self.env.launch()
     return self.env.get_task(task_class)
 def get_task(self, randomize_every, frequency, visual_config=None,
              dynamics_config=None):
     cam_config = CameraConfig(render_mode=RenderMode.OPENGL)
     obs_config = ObservationConfig(right_shoulder_camera=cam_config)
     obs_config.set_all(False)
     obs_config.set_all_low_dim(True)
     obs_config.right_shoulder_camera.rgb = True
     mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY)
     self.env = DomainRandomizationEnvironment(
         mode, ASSET_DIR, obs_config, True, True,
         randomize_every, frequency, visual_config, dynamics_config)
     self.env.launch()
     return self.env.get_task(OpenMicrowave)
 def test_get_stored_demos_images_without_init_sim(self):
     obs_config = ObservationConfig()
     obs_config.set_all(False)
     obs_config.set_all_low_dim(True)
     obs_config.right_shoulder_camera.rgb = True
     action_mode = ActionMode()
     self.env = environment.Environment(
         action_mode, ASSET_DIR, obs_config, headless=True)
     demos = self.env.get_demos('reach_target', 2)
     self.assertEqual(len(demos), 2)
     self.assertGreater(len(demos[0]), 0)
     self.assertIsInstance(demos[0][0].right_shoulder_rgb, np.ndarray)
     self.assertIsNone(demos[0][0].left_shoulder_rgb)
Exemple #10
0
    def __init__(self, task_name: str, state_type: list = 'state', ):
        # render_mode=None):
        """
        create RL Bench environment
        :param task_name: task names can be found in rlbench.tasks
        :param state_type: state or vision or a sub list of state_types list like ['left_shoulder_rgb']
        """
        if state_type == 'state' or state_type == 'vision' or isinstance(state_type, list):
            self._state_type = state_type
        else:
            raise ValueError('State type value error, your value is {}'.format(state_type))
        # self._render_mode = render_mode
        self._render_mode = None
        obs_config = ObservationConfig()
        obs_config.set_all(True)
        action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY)
        self.env = Environment(
            action_mode, obs_config=obs_config, headless=True)
        self.env.launch()
        try:
            self.task = self.env.get_task(getattr(sys.modules[__name__], task_name))
        except:
            raise NotImplementedError

        _, obs = self.task.reset()
        self.spec = Spec(task_name)

        if self._state_type == 'state':
            self.observation_space = spaces.Box(
                low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape)
        elif self._state_type == 'vision':
            space_dict = OrderedDict()
            space_dict["state"] = spaces.Box(
                low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape)
            for i in ["left_shoulder_rgb", "right_shoulder_rgb", "wrist_rgb", "front_rgb"]:
                space_dict[i] = spaces.Box(
                    low=0, high=1, shape=getattr(obs, i).shape)
            self.observation_space = spaces.Dict(space_dict)
        else:
            space_dict = OrderedDict()
            for name in self._state_type:
                if name.split('_')[-1] in ('rgb', 'depth', 'mask'):
                    space_dict[name] = spaces.Box(
                        low=0, high=1, shape=getattr(obs, name).shape)
                else:
                    space_dict[name] = spaces.Box(
                        low=-np.inf, high=np.inf,
                        shape=getattr(obs, name).shape)
                self.observation_space = spaces.Dict(space_dict)
        self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(self.env.action_size,), dtype=np.float32)
Exemple #11
0
    def get_env(self, task=None):
        task = task if task else self.config.env_type
        if self.use_gym:
            assert type(
                task
            ) == str  # NOTE : When using gym, the task has to be represented as a sting.
            assert self.observation_mode in ['vision', 'state']

            env = gym.make(
                task,
                observation_mode=self.config.env_args['observation_mode'],
                render_mode=self.config.env_args['render_mode'])
            self.env_obj = env
        else:
            obs_config = ObservationConfig()
            if self.observation_mode == 'vision':
                obs_config.set_all(True)
            elif self.observation_mode == 'state':
                obs_config.set_all_high_dim(False)
                obs_config.set_all_low_dim(True)
            else:
                obs_config_dict = {
                    self.left_obv_key: obs_config.left_shoulder_camera,
                    self.right_obv_key: obs_config.right_shoulder_camera,
                    self.wrist_obv_key: obs_config.wrist_camera,
                    self.front_obv_key: obs_config.front_camera
                }

                assert self.observation_mode in obs_config_dict.keys()

                obs_config.set_all_high_dim(False)
                obs_config_dict[self.observation_mode].set_all(True)
                obs_config.set_all_low_dim(True)

            # TODO : Write code to change it from env_args
            action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY)
            self.env_obj = Environment(action_mode,
                                       obs_config=obs_config,
                                       headless=True)

            task = task if task else ReachTarget
            if type(task) == str:
                task = task.split('-')[0]
                task = self.env_obj._string_to_task(task)

            self.env_obj.launch()
            env = self.env_obj.get_task(
                task)  # NOTE : `env` refered as `task` in RLBench docs.
        return env
 def __init__(self,\
             action_mode=DEFAULT_ACTION_MODE,\
             task=DEFAULT_TASK,\
             dataset_root='',\
             headless=True):
     obs_config = ObservationConfig()
     obs_config.set_all(True)
     action_mode = action_mode
     self.env = Environment(
         action_mode,dataset_root,obs_config=obs_config, headless=headless)
     # Dont need to call launch as task.get_task can launch env. 
     self.task = self.env.get_task(task)
     _, obs = self.task.reset()
     self.action_space =  spaces.Box(low=-1.0, high=1.0, shape=(self.env.action_size,), dtype=np.float32)
     self.logger = logger.create_logger(__class__.__name__)
     self.logger.propagate = 0
Exemple #13
0
    def __init__(self, h):

        #64x64 camera outputs
        cam = CameraConfig(image_size=(64, 64))
        obs_config = ObservationConfig(left_shoulder_camera=cam,
                                       right_shoulder_camera=cam,
                                       wrist_camera=cam,
                                       front_camera=cam)
        obs_config.set_all(True)

        # delta EE control with motion planning
        action_mode = ActionMode(ArmActionMode.DELTA_EE_POSE_PLAN_WORLD_FRAME)

        #Inits
        self.env = Environment(action_mode, obs_config=obs_config, headless=h)
        self.env.launch()
        self.task = self.env.get_task(ReachTarget)
Exemple #14
0
    def __init__(self, task_class, observation_mode='state'):
        self._observation_mode = observation_mode
        obs_config = ObservationConfig()
        if observation_mode == 'state':
            obs_config.set_all_high_dim(False)
            obs_config.set_all_low_dim(True)
        elif observation_mode == 'vision':
            obs_config.set_all(True)
        else:
            raise ValueError('Unrecognised observation_mode: %s.' %
                             observation_mode)
        action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY)
        self.env = Environment(action_mode,
                               obs_config=obs_config,
                               headless=True)
        self.env.launch()
        self.task = self.env.get_task(task_class)

        _, obs = self.task.reset()

        self.action_space = spaces.Box(low=-1.0,
                                       high=1.0,
                                       shape=(action_mode.action_size, ))

        if observation_mode == 'state':
            self.observation_space = spaces.Box(
                low=-np.inf, high=np.inf, shape=obs.get_low_dim_data().shape)
        elif observation_mode == 'vision':
            self.observation_space = spaces.Dict({
                "state":
                spaces.Box(low=-np.inf,
                           high=np.inf,
                           shape=obs.get_low_dim_data().shape),
                "left_shoulder_rgb":
                spaces.Box(low=0, high=1, shape=obs.left_shoulder_rgb.shape),
                "right_shoulder_rgb":
                spaces.Box(low=0, high=1, shape=obs.right_shoulder_rgb.shape),
                "wrist_rgb":
                spaces.Box(low=0, high=1, shape=obs.wrist_rgb.shape),
            })

        self._gym_cam = None
def main(argv):

    obs_config = ObservationConfig(record_gripper_closing=True)
    obs_config.set_all(False)

    vrc = rand_every = None
    frequency = 0
    if FLAGS.domain_randomization:
        vrc = VisualRandomizationConfig(FLAGS.textures_path)
        rand_every = RandomizeEvery.TRANSITION
        frequency = 10

    env = Environment(ActionMode(), obs_config=obs_config,
                      randomize_every=rand_every, frequency=frequency,
                      visual_randomization_config=vrc, headless=FLAGS.headless)
    env.launch()

    # Add the camera to the scene
    cam_placeholder = Dummy('cam_cinematic_placeholder')
    cam = VisionSensor.create(FLAGS.camera_resolution)
    cam.set_pose(cam_placeholder.get_pose())
    cam.set_parent(cam_placeholder)

    cam_motion = CircleCameraMotion(cam, Dummy('cam_cinematic_base'), 0.005)
    tr = TaskRecorder(env, cam_motion, fps=30)

    if len(FLAGS.tasks) > 0:
        task_names = FLAGS.tasks
    else:
        task_names = [t.split('.py')[0] for t in os.listdir(TASKS_PATH)
                      if t != '__init__.py' and t.endswith('.py')]
    task_classes = [task_file_to_task_class(
        task_file) for task_file in task_names]

    for i, (name, cls) in enumerate(zip(task_names, task_classes)):
        good = tr.record_task(cls)
        if FLAGS.individual and good:
            tr.save(os.path.join(FLAGS.save_dir, '%s.avi' % name))

    if not FLAGS.individual:
        tr.save(os.path.join(FLAGS.save_dir, 'recorded_tasks.avi'))
    env.shutdown()
    args = parser.parse_args()

    python_file = os.path.join(TASKS_PATH, args.task)
    if not os.path.isfile(python_file):
        raise RuntimeError('Could not find the task file: %s' % python_file)

    task_class = task_file_to_task_class(args.task)

    DIR_PATH = os.path.dirname(os.path.abspath(__file__))
    sim = PyRep()
    ttt_file = os.path.join(DIR_PATH, '..', 'rlbench', TTT_FILE)
    sim.launch(ttt_file, headless=True)
    sim.step_ui()
    sim.set_simulation_timestep(0.005)
    sim.step_ui()
    sim.start()

    robot = Robot(Panda(), PandaGripper())

    active_task = task_class(sim, robot)
    obs = ObservationConfig()
    obs.set_all(False)
    scene = Scene(sim, robot, obs)
    try:
        task_smoke(active_task, scene, variation=2)
    except TaskValidationError as e:
        sim.shutdown()
        raise e
    sim.shutdown()
    print('Validation successful!')
Exemple #17
0
class RLBenchEnv(gym.Env):
    """An gym wrapper for RLBench."""

    metadata = {'render.modes': ['human']}

    def __init__(self,
                 task_class,
                 observation_mode='state',
                 action_mode='joint',
                 multi_action_space=False,
                 discrete_action_space=False):
        self.num_skip_control = 20
        self.epi_obs = []
        self.obs_record = True
        self.obs_record_id = None

        self._observation_mode = observation_mode
        self.obs_config = ObservationConfig()
        if observation_mode == 'state':
            self.obs_config.set_all_high_dim(False)
            self.obs_config.set_all_low_dim(True)
        elif observation_mode == 'state-goal':
            self.obs_config.set_all_high_dim(False)
            self.obs_config.set_all_low_dim(True)
            self.obs_config.set_goal_info(True)
        elif observation_mode == 'vision':
            self.obs_config.set_all(False)
            self.obs_config.set_camera_rgb(True)
        elif observation_mode == 'both':
            self.obs_config.set_all(True)
        else:
            raise ValueError('Unrecognised observation_mode: %s.' %
                             observation_mode)

        self._action_mode = action_mode
        self.ac_config = None
        if action_mode == 'joint':
            self.ac_config = ActionConfig(
                SnakeRobotActionConfig.ABS_JOINT_POSITION)
        elif action_mode == 'trigon':
            self.ac_config = ActionConfig(
                SnakeRobotActionConfig.TRIGON_MODEL_PARAM,
                is_discrete=discrete_action_space)
        else:
            raise ValueError('Unrecognised action_mode: %s.' % action_mode)

        self.env = Environment(action_config=self.ac_config,
                               obs_config=self.obs_config,
                               headless=True)
        self.env.launch()
        self.task = self.env.get_task(task_class)
        self.max_episode_steps = self.task.episode_len
        _, obs = self.task.reset()

        if action_mode == 'joint':
            self.action_space = spaces.Box(
                low=-1.7,
                high=1.7,
                shape=(self.ac_config.action_size, ),
                dtype=np.float32)
        elif action_mode == 'trigon':
            if multi_action_space:
                # action_space1 = spaces.MultiBinary(n=1)
                low1 = np.array([-0.8, -0.8])
                high1 = np.array([0.8, 0.8])
                action_space1 = spaces.Box(low=low1,
                                           high=high1,
                                           dtype=np.float32)
                # low = np.array([-0.8, -0.8, 1.0, 3.0, -50, -10, -0.1, -0.1])
                # high = np.array([0.8, 0.8, 3.0, 5.0, 50, 10, 0.1, 0.1])
                low2 = np.array([1.0])
                high2 = np.array([3.0])
                action_space2 = spaces.Box(low=low2,
                                           high=high2,
                                           dtype=np.float32)
                self.action_space = spaces.Tuple(
                    (action_space1, action_space2))
            elif discrete_action_space:
                self.action_space = spaces.MultiDiscrete([4, 4, 4])
            else:
                # low = np.array([0.0, -0.8, -0.8, 1.0, 3.0, -50, -10, -0.1, -0.1])
                # high = np.array([1.0, 0.8, 0.8, 3.0, 5.0, 50, 10, 0.1, 0.1])
                low = np.array([-1, -1, -1])
                high = np.array([1, 1, 1])
                self.action_space = spaces.Box(low=low,
                                               high=high,
                                               dtype=np.float32)

        if observation_mode == 'state' or observation_mode == 'state-goal':
            self.observation_space = spaces.Box(
                low=-np.inf,
                high=np.inf,
                shape=(obs.get_low_dim_data().shape[0] *
                       self.num_skip_control, ))
            if observation_mode == 'state-goal':
                self.goal_dim = obs.get_goal_dim()
        elif observation_mode == 'vision':
            self.observation_space = spaces.Box(
                low=0, high=1, shape=obs.head_camera_rgb.shape)
        elif observation_mode == 'both':
            self.observation_space = spaces.Dict({
                "state":
                spaces.Box(low=-np.inf,
                           high=np.inf,
                           shape=obs.get_low_dim_data().shape),
                "rattler_eye_rgb":
                spaces.Box(low=0, high=1, shape=obs.head_camera_rgb.shape),
                "rattler_eye_depth":
                spaces.Box(low=0, high=1, shape=obs.head_camera_depth.shape),
            })

        self._gym_cam = None

    def _extract_obs(self, obs):
        if self._observation_mode == 'state':
            return obs.get_low_dim_data()
        elif self._observation_mode == 'state-goal':
            obs_goal = {'observation': obs.get_low_dim_data()}
            obs_goal.update(obs.get_goal_data())
            return obs_goal
        elif self._observation_mode == 'vision':
            return obs.head_camera_rgb
        elif self._observation_mode == 'both':
            return {
                "state": obs.get_low_dim_data(),
                "rattler_eye_rgb": obs.head_camera_rgb,
                "rattle_eye_depth": obs.head_camera_depth,
            }

    def render(self, mode='human'):
        if self._gym_cam is None:
            # # Add the camera to the scene
            self._gym_cam = VisionSensor('monitor')
            self._gym_cam.set_resolution([640, 640])
            self._gym_cam.set_render_mode(RenderMode.EXTERNAL_WINDOWED)

    def reset(self):
        obs_data_group = []
        obs_data_dict = {
            'observation': [],
            'desired_goal': None,
            'achieved_goal': None
        }
        descriptions, obs = self.task.reset()
        self.epi_obs.append(obs)
        obs_data = self._extract_obs(obs)
        for _ in range(self.num_skip_control):
            # obs_data_group.extend(obs_data)
            if isinstance(obs_data, list) or isinstance(obs_data, np.ndarray):
                obs_data_group.extend(obs_data)
            elif isinstance(obs_data, dict):
                obs_data_dict['observation'].extend(obs_data['observation'])
                obs_data_dict['desired_goal'] = obs_data['desired_goal']
                obs_data_dict['achieved_goal'] = obs_data['achieved_goal']
        ret_obs = obs_data_group if len(obs_data_group) else obs_data_dict
        del descriptions  # Not used
        return ret_obs

    def step(self, action):
        obs_data_group = []
        obs_data_dict = {
            'observation': [],
            'desired_goal': None,
            'achieved_goal': None
        }
        reward_group = []
        terminate = False
        for _ in range(self.num_skip_control):
            obs, reward, step_terminate, success = self.task.step(action)
            self.epi_obs.append(obs)
            obs_data = self._extract_obs(obs)
            if isinstance(obs_data, list) or isinstance(obs_data, np.ndarray):
                obs_data_group.extend(obs_data)
            elif isinstance(
                    obs_data,
                    dict):  # used for hierarchical reinforcement algorithm
                obs_data_dict['observation'].extend(obs_data['observation'])
                obs_data_dict['desired_goal'] = obs_data['desired_goal']
                obs_data_dict['achieved_goal'] = obs_data['achieved_goal']
            reward_group.append(reward)
            terminate |= step_terminate
            if terminate:
                if self.obs_record and success:  # record a successful experience
                    self.record_obs("RobotPos")
                self.epi_obs = []
                break
        ret_obs = obs_data_group if len(obs_data_group) else obs_data_dict
        return ret_obs, np.mean(reward_group), terminate, {}

    def close(self):
        self.env.shutdown()

    # def load_env_param(self):
    #     self.env.load_env_param()

    def compute_reward(self, achieved_goal=None, desired_goal=None, info=None):
        assert achieved_goal is not None
        assert desired_goal is not None
        return self.task.compute_reward(achieved_goal, desired_goal)

    def record_obs(self, obs_part):
        if self.obs_record_id is None:
            record_filenames = glob.glob("./obs_record/obs_record_*.txt")
            record_filenames.sort(key=lambda filename: int(
                filename.split('_')[-1].split('.')[0]))
            if len(record_filenames) == 0:
                self.obs_record_id = 1
            else:
                last_id = int(
                    record_filenames[-1].split('_')[-1].split('.')[0])
                self.obs_record_id = last_id + 1
        else:
            self.obs_record_id += 1
        filename = './obs_record/obs_record_' + str(
            self.obs_record_id) + '.txt'
        obs_record_file = open(filename, 'w')

        if obs_part == 'All':
            pass
        if obs_part == 'RobotPos':
            robot_pos_arr = []
            for obs in self.epi_obs:
                robot_pos = obs.get_2d_robot_pos()
                robot_pos_arr.append(robot_pos)
            target_pos = self.task.get_goal()
            robot_pos_arr.append(
                target_pos)  # The last line records the target position
            robot_pos_arr = np.array(robot_pos_arr)
            np.savetxt(obs_record_file, robot_pos_arr, fmt="%f")
        obs_record_file.close()
Exemple #18
0
from rlbench.action_modes import ArmActionMode, ActionMode
from rlbench.observation_config import ObservationConfig
from rlbench.tasks import ReachTarget
import numpy as np


class Agent(object):
    def __init__(self, action_size):
        self.action_size = action_size

    def act(self, obs):
        return (np.random.normal(0.0, 0.1, size=(self.action_size, ))).tolist()


obs_config = ObservationConfig()
obs_config.set_all(False)
obs_config.left_shoulder_camera.rgb = True
obs_config.right_shoulder_camera.rgb = True
obs_config.wrist_camera.rgb = True

action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY)
env = Environment(action_mode,
                  obs_config=obs_config,
                  headless=False,
                  robot_configuration='jaco')
env.launch()

task = env.get_task(ReachTarget)

agent = Agent(action_size=7)  # Jaco is 6DoF + 1 for gripper action

class ImitationLearning(object):
    def predict_action(self, batch):
        return np.random.uniform(size=(len(batch), 7))

    def behaviour_cloning_loss(self, ground_truth_actions, predicted_actions):
        return 1


# To use 'saved' demos, set the path below, and set live_demos=False
live_demos = True
DATASET = '' if live_demos else 'PATH/TO/YOUR/DATASET'

obs_config = ObservationConfig()
obs_config.set_all(True)

action_mode = ActionMode(ArmActionMode.ABS_JOINT_VELOCITY)
env = Environment(action_mode, DATASET, obs_config, False)
env.launch()

task = env.get_task(ReachTarget)

il = ImitationLearning()

demos = task.get_demos(2, live_demos=live_demos)  # -> List[List[Observation]]
demos = np.array(demos).flatten()

# An example of using the demos to 'train' using behaviour cloning loss.
for i in range(100):
    print("'training' iteration %d" % i)
Exemple #20
0
class FakeRLBenchEnv(Environment):

    ROBOT_NAME = SUPPORTED_ROBOTS.keys()
    OBSERVATION_MODE = ("state", "vision", "all")
    ACTION_MODE = {
        "joint velocity": ArmActionMode.ABS_JOINT_VELOCITY,
        "delta joint velocity": ArmActionMode.DELTA_JOINT_VELOCITY,
        "joint position": ArmActionMode.ABS_JOINT_POSITION,
        "delta joint position": ArmActionMode.DELTA_JOINT_POSITION,
        "joint torque": ArmActionMode.ABS_JOINT_TORQUE,
        "delta joint torque": ArmActionMode.DELTA_JOINT_TORQUE,
        "effector velocity": ArmActionMode.ABS_EE_VELOCITY,
        "delta effector velocity": ArmActionMode.DELTA_EE_VELOCITY,
        "effector position": ArmActionMode.ABS_EE_POSE,
        "delta effector position": ArmActionMode.DELTA_EE_POSE
    }

    def __init__(self,
                 task_name: str,
                 observation_mode: str = "state",
                 action_mode: str = "delta joint position",
                 robot_name: str = "panda"):
        super(FakeRLBenchEnv, self).__init__(task_name)
        if task_name not in all_class_names(tasks):
            raise KeyError(f"Error: unknown task name {task_name}")
        if observation_mode not in FakeRLBenchEnv.OBSERVATION_MODE:
            raise KeyError(
                f"Error: unknown observation mode {observation_mode}, available: {FakeRLBenchEnv.OBSERVATION_MODE}"
            )
        if action_mode not in FakeRLBenchEnv.ACTION_MODE:
            raise KeyError(
                f"Error: unknown action mode {action_mode}, available: {FakeRLBenchEnv.ACTION_MODE.keys()}"
            )
        if robot_name not in FakeRLBenchEnv.ROBOT_NAME:
            raise KeyError(
                f"Error: unknown robot name {robot_name}, available: {FakeRLBenchEnv.ROBOT_NAME}"
            )

        # TODO: modify the task/robot/arm/gripper to support early instantiation before v-rep launched
        self._observation_mode = observation_mode
        self._action_mode = action_mode
        self._task_name = task_name
        self._robot_name = robot_name

        self._observation_config = ObservationConfig()
        if self._observation_mode == "state":
            self._observation_config.set_all_low_dim(True)
            self._observation_config.set_all_high_dim(False)
        elif self._observation_mode == "vision":
            self._observation_config.set_all_low_dim(False)
            self._observation_config.set_all_high_dim(True)
        elif self._observation_mode == "all":
            self._observation_config.set_all(True)

        self._action_config = ActionMode(
            FakeRLBenchEnv.ACTION_MODE[self._action_mode])

        self.env = None
        self.task = None

        self._update_info_dict()

    def init(self, display=False):
        with suppress_stdout():
            self.env = RLEnvironment(action_mode=self._action_config,
                                     obs_config=self._observation_config,
                                     headless=not display,
                                     robot_configuration=self._robot_name)
            self.env.launch()
            self.task = self.env.get_task(
                get_named_class(self._task_name, tasks))

    def reset(self, random: bool = True) -> StepDict:
        if not random:
            np.random.seed(0)
        self.task._static_positions = not random
        descriptions, obs = self.task.reset()
        # Returns a list of descriptions and the first observation
        next_step = {"opt": descriptions}

        if self._observation_mode == "state" or self._observation_mode == "all":
            next_step['s'] = obs.get_low_dim_data()
        if self._observation_mode == "vision" or self._observation_mode == "all":
            next_step["left shoulder rgb"] = obs.left_shoulder_rgb
            next_step["right_shoulder_rgb"] = obs.right_shoulder_rgb
            next_step["wrist_rgb"] = obs.wrist_rgb
        return next_step

    def step(self, last_step: StepDict) -> (StepDict, bool):
        assert 'a' in last_step, "Key 'a' for action not in last_step, maybe you passed a wrong dict ?"

        obs, reward, terminate = self.task.step(last_step['a'])
        last_step['r'] = reward
        last_step["info"] = {}
        next_step = {"opt": None}

        if self._observation_mode == "state" or self._observation_mode == "all":
            next_step['s'] = obs.get_low_dim_data()
        if self._observation_mode == "vision" or self._observation_mode == "all":
            next_step["left shoulder rgb"] = obs.left_shoulder_rgb
            next_step["right_shoulder_rgb"] = obs.right_shoulder_rgb
            next_step["wrist_rgb"] = obs.wrist_rgb
        return last_step, next_step, terminate

    def finalize(self) -> bool:
        with suppress_stdout():
            self.env.shutdown()
        self.task = None
        self.env = None
        return True

    def name(self) -> str:
        return self._task_name

    # ------------- private methods ------------- #

    def _update_info_dict(self):
        # update info dict
        self._info["action mode"] = self._action_mode
        self._info["observation mode"] = self._observation_mode
        # TODO: action dim should related to robot, not action mode, here we fixed it temporally
        self._info["action dim"] = (self._action_config.action_size, )
        self._info["action low"] = -np.ones(self._info["action dim"],
                                            dtype=np.float32)
        self._info["action high"] = np.ones(self._info["action dim"],
                                            dtype=np.float32)
        if self._observation_mode == "state" or self._observation_mode == "all":
            # TODO: observation should be determined without init the entire environment
            with suppress_stdout():
                env = RLEnvironment(action_mode=self._action_config,
                                    obs_config=self._observation_config,
                                    headless=True,
                                    robot_configuration=self._robot_name)
                env.launch()
                task = env.get_task(get_named_class(self._task_name, tasks))
                _, obs = task.reset()
                env.shutdown()
                del task
                del env
            self._info["time step"] = _DT
            self._info["state dim"] = tuple(obs.get_low_dim_data().shape)
            self._info["state low"] = np.ones(self._info["state dim"],
                                              dtype=np.float32) * -np.inf
            self._info["state high"] = np.ones(self._info["state dim"],
                                               dtype=np.float32) * np.inf
        if self._observation_mode == "vision" or self._observation_mode == "all":
            self._info["left shoulder rgb dim"] = tuple(
                self._observation_config.left_shoulder_camera.image_size) + (
                    3, )
            self._info["left shoulder rgb low"] = np.zeros(
                self._info["left shoulder rgb dim"], dtype=np.float32)
            self._info["left shoulder rgb high"] = np.ones(
                self._info["left shoulder rgb dim"], dtype=np.float32)
            self._info["right shoulder rgb  dim"] = tuple(
                self._observation_config.right_shoulder_camera.image_size) + (
                    3, )
            self._info["right shoulder rgb  low"] = np.zeros(
                self._info["right shoulder rgb  dim"], dtype=np.float32)
            self._info["right shoulder rgb  high"] = np.ones(
                self._info["right shoulder rgb  dim"], dtype=np.float32)
            self._info["wrist rgb dim"] = tuple(
                self._observation_config.wrist_camera.image_size) + (3, )
            self._info["wrist rgb low"] = np.zeros(self._info["wrist rgb dim"],
                                                   dtype=np.float32)
            self._info["wrist rgb high"] = np.ones(self._info["wrist rgb dim"],
                                                   dtype=np.float32)
        self._info["reward low"] = -np.inf
        self._info["reward high"] = np.inf

    def live_demo(self, amount: int, random: bool = True) -> SampleBatch:
        """
        :param amount: number of demonstration trajectories to be generated
        :param random: if the starting position is random
        :return: observation list : [amount x [(steps-1) x [s, a] + [s_term, None]]],
                 WARNING: that the action here is calculated from observation, when executing, they may cause some inaccuracy
        """
        seeds = [rnd.randint(0, 4096) if random else 0 for _ in range(amount)]
        self.task._static_positions = not random

        demo_pack = []
        for seed in seeds:
            np.random.seed(seed)
            pack = self.task.get_demos(1, True)[0]

            demo_traj = []
            np.random.seed(seed)
            desc, obs = self.task.reset()
            v_tar = 0.
            for o_tar in pack[1:]:
                action = []
                if self._action_config.arm == ArmActionMode.ABS_JOINT_VELOCITY:
                    action.extend(
                        (o_tar.joint_positions - obs.joint_positions) / _DT)
                elif self._action_config.arm == ArmActionMode.ABS_JOINT_POSITION:
                    action.extend(o_tar.joint_positions)
                elif self._action_config.arm == ArmActionMode.ABS_JOINT_TORQUE:
                    action.extend(o_tar.joint_forces)
                    raise TypeError(
                        "Warning, abs_joint_torque is not currently supported")
                elif self._action_config.arm == ArmActionMode.ABS_EE_POSE:
                    action.extend(o_tar.gripper_pose)
                elif self._action_config.arm == ArmActionMode.ABS_EE_VELOCITY:
                    # WARNING: This calculating method is not so accurate since rotation cannot be directed 'add' together
                    #          since the original RLBench decides to do so, we should follow it
                    action.extend(
                        (o_tar.gripper_pose - obs.gripper_pose) / _DT)
                elif self._action_config.arm == ArmActionMode.DELTA_JOINT_VELOCITY:
                    v_tar = (o_tar.joint_positions - obs.joint_positions) / _DT
                    action.extend(v_tar - obs.joint_velocities)
                    raise TypeError(
                        "Warning, delta_joint_velocity is not currently supported"
                    )
                elif self._action_config.arm == ArmActionMode.DELTA_JOINT_POSITION:
                    action.extend(o_tar.joint_positions - obs.joint_positions)
                elif self._action_config.arm == ArmActionMode.DELTA_JOINT_TORQUE:
                    action.extend(o_tar.joint_forces - obs.joint_forces)
                    raise TypeError(
                        "Warning, delta_joint_torque is not currently supported"
                    )
                elif self._action_config.arm == ArmActionMode.DELTA_EE_POSE:
                    action.extend(o_tar.gripper_pose[:3] -
                                  obs.gripper_pose[:3])
                    q = Quaternion(o_tar.gripper_pose[3:7]) * Quaternion(
                        obs.gripper_pose[3:7]).conjugate
                    action.extend(list(q))
                elif self._action_config.arm == ArmActionMode.DELTA_EE_VELOCITY:
                    # WARNING: This calculating method is not so accurate since rotation cannot be directed 'add' together
                    #          since the original RLBench decides to do so, we should follow it
                    v_tar_new = (o_tar.gripper_pose - obs.gripper_pose) / _DT
                    action.extend(v_tar_new - v_tar)
                    v_tar = v_tar_new
                    raise TypeError(
                        "Warning, delta_ee_velocity is not currently supported"
                    )

                action.append(1.0 if o_tar.gripper_open > 0.9 else 0.0)
                action = np.asarray(action, dtype=np.float32)
                demo_traj.append({
                    'observation': obs,
                    'a': action,
                    's': obs.get_low_dim_data()
                })
                obs, reward, done = self.task.step(action)
                demo_traj[-1]['r'] = reward

            demo_pack.append(demo_traj)
        return {
            "trajectory": demo_pack,
            "config": "default",
            "policy": "hand-coding",
            "env class": self.__class__.__name__,
            "env name": self._task_name,
            "env config": "default",
            "observation config": self._observation_mode,
            "robot config": self._robot_name,
            "action config": self._action_mode
        }