Beispiel #1
0
    def __init__(self, env_params_dict, reset_state=None):
        hand_low=(-0.2, 0.4, 0.0)
        hand_high=(0.2, 0.8, 0.05)
        obj_low=(-0.3, 0.4, 0.1)
        obj_high=(0.3, 0.8, 0.3)

        dirname = '/'.join(os.path.abspath(__file__).split('/')[:-1])
        params_dict = copy.deepcopy(env_params_dict)
        _hp = self._default_hparams()
        for name, value in params_dict.items():
            print('setting param {} to value {}'.format(name, value))
            _hp.set_hparam(name, value)

        filename = os.path.join(dirname, "assets/sawyer_xyz/sawyer_noobj.xml")

        BaseMujocoEnv.__init__(self, filename, _hp)
        SawyerXYZEnv.__init__(
                self,
                frame_skip=5,
                action_scale=1./10,
                hand_low=hand_low,
                hand_high=hand_high,
                model_name=filename
            )
        goal_low = self.hand_low
        goal_high = self.hand_high
        self._adim = 4
        self._hp = _hp
        self.liftThresh = 0.04
        self.max_path_length = 100
        self.hand_init_pos = np.array((0, 0.6, 0.0))
Beispiel #2
0
    def __init__(self, env_params_dict, reset_state=None):
        hand_low = (0.05, -0.75, 0.73)
        hand_high = (0.6, -0.48, 1.0)
        obj_low = (0.15, -0.4, 0.6)
        obj_high = (0.4, -0.6, 0.6)

        dirname = '/'.join(os.path.abspath(__file__).split('/')[:-1])
        params_dict = copy.deepcopy(env_params_dict)
        _hp = self._default_hparams()
        for name, value in params_dict.items():
            print('setting param {} to value {}'.format(name, value))
            _hp.set_hparam(name, value)

        filename = os.path.join(dirname,
                                "./playrooms/rooms/playroom/playroom.xml")

        BaseMujocoEnv.__init__(self, filename, _hp)
        SawyerXYZEnv.__init__(self,
                              frame_skip=15,
                              action_scale=1. / 5,
                              hand_low=hand_low,
                              hand_high=hand_high,
                              model_name=filename)
        goal_low = self.hand_low
        goal_high = self.hand_high
        self.obj_low = obj_low
        self.obj_high = obj_high
        self._adim = 4
        self._hp = _hp
        self.liftThresh = 0.04
        self.max_path_length = 100
        self.hand_init_pos = np.array([0.6, -0.48, 1.0])
Beispiel #3
0
    def __init__(self,
                 random_init=True,
                 goal_low=(0.4, 0.55, 0.02),
                 goal_high=(0.4, 0.6, 0.02),
                 liftThresh=0.04,
                 rotMode='fixed',
                 rewMode='orig',
                 obs_type='with_goal_init_obs',
                 **kwargs):

        hand_low = (-0.5, 0.40, 0.05)
        hand_high = (0.5, 1, 0.5)
        obj_low = (-0.08, 0.58, 0.02)
        obj_high = (-0.03, 0.62, 0.02)
        SawyerXYZEnv.__init__(self,
                              frame_skip=5,
                              action_scale=1. / 100,
                              hand_low=hand_low,
                              hand_high=hand_high,
                              model_name=self.model_name,
                              **kwargs)

        self.init_config = {
            'stick_init_pos': np.array([-0.1, 0.6, 0.02]),
            'hand_init_pos': np.array([0, 0.6, 0.2]),
        }
        self.goal = self.init_config['stick_init_pos']
        self.stick_init_pos = self.init_config['stick_init_pos']
        self.hand_init_pos = self.init_config['hand_init_pos']

        assert obs_type in OBS_TYPE

        self.obs_type = obs_type
        if obj_low is None:
            obj_low = self.hand_low

        if goal_low is None:
            goal_low = self.hand_low

        if obj_high is None:
            obj_high = self.hand_high

        if goal_high is None:
            goal_high = self.hand_high

        self.random_init = random_init
        self.liftThresh = liftThresh
        self.max_path_length = 200
        self.rewMode = rewMode
        self.rotMode = rotMode
        if rotMode == 'fixed':
            self.action_space = Box(
                np.array([-1, -1, -1, -1]),
                np.array([1, 1, 1, 1]),
            )
        elif rotMode == 'rotz':
            self.action_rot_scale = 1. / 50
            self.action_space = Box(
                np.array([-1, -1, -1, -np.pi, -1]),
                np.array([1, 1, 1, np.pi, 1]),
            )
        elif rotMode == 'quat':
            self.action_space = Box(
                np.array([-1, -1, -1, 0, -1, -1, -1, -1]),
                np.array([1, 1, 1, 2 * np.pi, 1, 1, 1, 1]),
            )
        else:
            self.action_space = Box(
                np.array([-1, -1, -1, -np.pi / 2, -np.pi / 2, 0, -1]),
                np.array([1, 1, 1, np.pi / 2, np.pi / 2, np.pi * 2, 1]),
            )

        # For now, fix the object initial position.
        self.obj_init_pos = np.array([0.2, 0.6, 0.04])
        self.obj_init_qpos = np.array([0.0, 0.0])
        self.obj_space = Box(np.array(obj_low), np.array(obj_high))
        self.goal_space = Box(np.array(goal_low), np.array(goal_high))
        self.obj_and_goal_space = Box(
            np.hstack((obj_low, goal_low)),
            np.hstack((obj_high, goal_high)),
        )
        self.obs_type = obs_type
        if self.obs_type == 'plain':
            self.observation_space = Box(
                np.hstack((self.hand_low, obj_low, obj_low)),
                np.hstack((self.hand_high, obj_high, obj_high)),
            )
        elif self.obs_type == 'with_goal':
            self.observation_space = Box(
                np.hstack((self.hand_low, obj_low, obj_low, goal_low)),
                np.hstack((self.hand_high, obj_high, obj_high, goal_high)),
            )
        elif self.obs_type == 'with_goal_init_obs':
            self.observation_space = Box(
                np.hstack(
                    (self.hand_low, obj_low, obj_low, obj_low, goal_low)),
                np.hstack(
                    (self.hand_high, obj_high, obj_high, obj_high, goal_high)),
            )
            self.goal_len = self.obj_and_goal_space.low.shape[0]
        else:
            raise NotImplementedError
        self.reset()
    def __init__(self,
                 random_init=True,
                 obs_type='with_goal',
                 goal_low=(-0.1, 0.75, 0.17),
                 goal_high=(0.1, 0.85, 0.17),
                 liftThresh=0.05,
                 rewMode='orig',
                 rotMode='fixed',
                 **kwargs):
        hand_low = (-0.5, 0.40, 0.05)
        hand_high = (0.5, 1, 0.5)
        obj_low = (0.1, 0.75, 0.02)
        obj_high = (0., 0.85, 0.02)
        SawyerXYZEnv.__init__(self,
                              frame_skip=5,
                              action_scale=1. / 100,
                              hand_low=hand_low,
                              hand_high=hand_high,
                              model_name=self.model_name,
                              **kwargs)
        self.init_config = {
            'obj_init_angle': 0.3,
            'obj_init_pos': np.array([0, 0.8, 0.02]),
            'hand_init_pos': np.array((0, 0.6, 0.2), dtype=np.float32),
        }
        self.goal = np.array([0, 0.8, 0.17])
        self.obj_init_pos = self.init_config['obj_init_pos']
        self.obj_init_angle = self.init_config['obj_init_angle']
        self.hand_init_pos = self.init_config['hand_init_pos']

        assert obs_type in OBS_TYPE
        self.obs_type = obs_type

        if goal_low is None:
            goal_low = self.hand_low

        if goal_high is None:
            goal_high = self.hand_high

        self.random_init = random_init
        self.liftThresh = liftThresh
        self.max_path_length = 200
        self.rewMode = rewMode
        self.rotMode = rotMode
        if rotMode == 'fixed':
            self.action_space = Box(
                np.array([-1, -1, -1, -1]),
                np.array([1, 1, 1, 1]),
            )
        elif rotMode == 'quat':
            self.action_space = Box(
                np.array([-1, -1, -1, 0, -1, -1, -1, -1]),
                np.array([1, 1, 1, 2 * np.pi, 1, 1, 1, 1]),
            )
        else:
            self.action_space = Box(
                np.array([-1, -1, -1, -np.pi / 2, -np.pi / 2, 0, -1]),
                np.array([1, 1, 1, np.pi / 2, np.pi / 2, np.pi * 2, 1]),
            )
        self.obj_and_goal_space = Box(
            np.hstack((obj_low, goal_low)),
            np.hstack((obj_high, goal_high)),
        )
        self.goal_space = Box(np.array(goal_low), np.array(goal_high))
        if self.obs_type == 'plain':
            self.observation_space = Box(
                np.hstack((
                    self.hand_low,
                    obj_low,
                )),
                np.hstack((
                    self.hand_high,
                    obj_high,
                )),
            )
        elif self.obs_type == 'with_goal':
            self.observation_space = Box(
                np.hstack((self.hand_low, obj_low, goal_low)),
                np.hstack((self.hand_high, obj_high, goal_high)),
            )
        else:
            raise NotImplementedError
        self.reset()
Beispiel #5
0
    def __init__(self,
                 random_init=False,
                 goal_low=(-0.1, 0.6, 0.15),
                 goal_high=(0.1, 0.7, 0.3),
                 liftThresh=0.11,
                 obs_type='with_goal',
                 rotMode='fixed',
                 rewMode='orig',
                 timestep=0.0025,
                 **kwargs):
        self.quick_init(locals())
        hand_low = (-0.5, 0.40, -0.05)
        hand_high = (0.5, 1, 0.5)
        obj_low = (0, 0.84, -0.03)
        obj_high = (0, 0.84, -0.03)
        SawyerXYZEnv.__init__(self,
                              frame_skip=5,
                              action_scale=1. / 100,
                              hand_low=hand_low,
                              hand_high=hand_high,
                              model_name=self.model_name,
                              **kwargs)

        self.init_config = {
            'obj_init_pos': np.array([0, 0.84, -0.03]),
            'obj_init_angle': 0.3,
            'hand_init_pos': np.array([0., .6, .2]),
        }
        self.goal = np.array([0., 0.6, 0.2])
        self.obj_init_pos = self.init_config['obj_init_pos']
        self.obj_init_angle = self.init_config['obj_init_angle']
        self.hand_init_pos = self.init_config['hand_init_pos']

        assert obs_type in OBS_TYPE
        self.obs_type = obs_type

        if goal_low is None:
            goal_low = self.hand_low

        if goal_high is None:
            goal_high = self.hand_high

        self.random_init = random_init
        self.max_path_length = 200
        self.rotMode = rotMode
        self.rewMode = rewMode
        self.liftThresh = liftThresh
        if rotMode == 'fixed':
            self.action_space = Box(
                np.array([-1, -1, -1, -1]),
                np.array([1, 1, 1, 1]),
            )
        elif rotMode == 'rotz':
            self.action_rot_scale = 1. / 50
            self.action_space = Box(
                np.array([-1, -1, -1, -np.pi, -1]),
                np.array([1, 1, 1, np.pi, 1]),
            )
        elif rotMode == 'quat':
            self.action_space = Box(
                np.array([-1, -1, -1, 0, -1, -1, -1, -1]),
                np.array([1, 1, 1, 2 * np.pi, 1, 1, 1, 1]),
            )
        else:
            self.action_space = Box(
                np.array([-1, -1, -1, -np.pi / 2, -np.pi / 2, 0, -1]),
                np.array([1, 1, 1, np.pi / 2, np.pi / 2, np.pi * 2, 1]),
            )
        self.obj_and_goal_space = Box(
            np.hstack((obj_low, goal_low)),
            np.hstack((obj_high, goal_high)),
        )
        self.goal_space = Box(np.array(goal_low), np.array(goal_high))
        if self.obs_type == 'plain':
            self.observation_space = Box(
                np.hstack((self.hand_low, obj_low)),
                np.hstack((self.hand_high, obj_high)),
            )
        elif self.obs_type == 'with_goal':
            self.observation_space = Box(
                np.hstack((self.hand_low, obj_low, goal_low)),
                np.hstack((self.hand_high, obj_high, goal_high)),
            )
        else:
            raise NotImplementedError

        self.imsize = 256
        self.camera = create_sawyer_camera_init(distance=0.6, elevation=-45)
        self.initialize_camera(self.camera)
        self.reset()
Beispiel #6
0
    def __init__(self,
                 obs_type='with_goal',
                 random_init=True,
                 goal_low=None,
                 goal_high=None,
                 rotMode='fixed',
                 timestep=0.0025,
                 params='fixed_goal_unconstrainted',
                 reward_delay=1,
                 **kwargs):
        self.quick_init(locals())
        self.reward_delay = reward_delay
        self.count = 0
        random_init = param[params]['random_init']
        hand_low = param[params]['hand_low']
        hand_high = param[params]['hand_high']
        self.init_config = param[params]['init_config']
        obj_low = (-0.1, 0.9, 0.04)
        obj_high = (0.1, 0.9, 0.04)
        SawyerXYZEnv.__init__(self,
                              frame_skip=5,
                              action_scale=1. / 100,
                              hand_low=hand_low,
                              hand_high=hand_high,
                              model_name=self.model_name,
                              **kwargs)

        self.goal = np.array([0., 0.55, 0.04])

        self.obj_init_pos = self.init_config['obj_init_pos']
        self.obj_init_angle = self.init_config['obj_init_angle']
        self.hand_init_pos = self.init_config['hand_init_pos']

        if goal_low is None:
            goal_low = self.hand_low

        if goal_high is None:
            goal_high = self.hand_high

        assert obs_type in OBS_TYPE
        self.obs_type = obs_type
        self.random_init = random_init
        self.max_path_length = 150
        self.rotMode = rotMode
        if rotMode == 'fixed':
            self.action_space = Box(
                np.array([-1, -1, -1, -1]),
                np.array([1, 1, 1, 1]),
            )
        elif rotMode == 'rotz':
            self.action_rot_scale = 1. / 50
            self.action_space = Box(
                np.array([-1, -1, -1, -np.pi, -1]),
                np.array([1, 1, 1, np.pi, 1]),
            )
        elif rotMode == 'quat':
            self.action_space = Box(
                np.array([-1, -1, -1, 0, -1, -1, -1, -1]),
                np.array([1, 1, 1, 2 * np.pi, 1, 1, 1, 1]),
            )
        else:
            self.action_space = Box(
                np.array([-1, -1, -1, -np.pi / 2, -np.pi / 2, 0, -1]),
                np.array([1, 1, 1, np.pi / 2, np.pi / 2, np.pi * 2, 1]),
            )
        self.obj_and_goal_space = Box(
            np.array(obj_low),
            np.array(obj_high),
        )
        self.goal_space = Box(np.array(goal_low), np.array(goal_high))
        if self.obs_type == 'plain':
            self.observation_space = Box(
                np.hstack((
                    self.hand_low,
                    obj_low,
                )),
                np.hstack((
                    self.hand_high,
                    obj_high,
                )),
            )
        elif self.obs_type == 'with_goal':
            self.observation_space = Box(
                np.hstack((self.hand_low, obj_low, goal_low)),
                np.hstack((self.hand_high, obj_high, goal_high)),
            )
        else:
            raise NotImplementedError
        self.reset()
    def __init__(self,
                 hand_low=(-0.5, 0.40, 0.1),
                 hand_high=(0.5, 1, 0.55),
                 obj_low=None,
                 obj_high=None,
                 random_init=False,
                 tasks=[{
                     'goal': np.array([0.1, 0.8, 0.2]),
                     'obj_init_pos': np.array([0, 0.7, 0.02]),
                     'obj_init_angle': 0.3
                 }],
                 goal_low=None,
                 goal_high=None,
                 hand_init_pos=(0, 0.6, 0.31),
                 liftThresh=0.04,
                 rewMode='orig',
                 rotMode='quat',
                 **kwargs):

        SawyerXYZEnv.__init__(self,
                              frame_skip=5,
                              action_scale=1. / 100,
                              hand_low=hand_low,
                              hand_high=hand_high,
                              model_name=self.model_name,
                              **kwargs)
        if obj_low is None:
            obj_low = self.hand_low

        if goal_low is None:
            goal_low = self.hand_low

        if obj_high is None:
            obj_high = self.hand_high

        if goal_high is None:
            goal_high = self.hand_high

        self.random_init = random_init
        self.liftThresh = liftThresh
        self.max_path_length = 150
        self.tasks = tasks
        self.num_tasks = len(tasks)
        self.rewMode = rewMode
        self.rotMode = rotMode
        self.hand_init_pos = np.array(hand_init_pos)
        if rotMode == 'quat':
            self.action_space = Box(
                np.array([-1, -1, -1, 0, -1, -1, -1, -1]),
                np.array([1, 1, 1, 2 * np.pi, 1, 1, 1, 1]),
            )
        else:
            self.action_space = Box(
                np.array([-1, -1, -1, -np.pi / 2, -np.pi / 2, 0, -1]),
                np.array([1, 1, 1, np.pi / 2, np.pi / 2, np.pi * 2, 1]),
            )
        self.hand_and_obj_space = Box(
            np.hstack((self.hand_low, obj_low)),
            np.hstack((self.hand_high, obj_high)),
        )
        self.goal_space = Box(goal_low, goal_high)
        self.observation_space = Box(
            np.hstack((self.hand_low, obj_low, obj_low)),
            np.hstack((self.hand_high, obj_high, obj_high)),
        )
Beispiel #8
0
    def __init__(self,
                 random_init=True,
                 goal_low=(-0.25, 0.6, 0.05),
                 goal_high=(-0.15, 0.8, 0.05),
                 liftThresh=0.04,
                 obs_type='with_goal',
                 rotMode='fixed',
                 timestep=0.0025,
                 params='random_goal_unconstrainted',
                 reward_delay=1,
                 rewMode='orig',
                 **kwargs):
        self.quick_init(locals())
        self.reward_delay = reward_delay
        self.count = 0
        hand_low = (-0.5, 0.40, 0.05)
        hand_high = (0.5, 1, 0.5)
        obj_low = (-0.25, 0.6, 0.05)
        obj_high = (-0.15, 0.8, 0.05)
        SawyerXYZEnv.__init__(self,
                              frame_skip=5,
                              action_scale=1. / 100,
                              hand_low=hand_low,
                              hand_high=hand_high,
                              model_name=self.model_name,
                              **kwargs)

        self.init_config = {
            'obj_init_pos': np.array([-0.225, 0.6, 0.05]),
            'hand_init_pos': np.array(((0, 0.6, 0.2))),
        }
        self.goal = np.array([-0.225, 0.6, 0.05])
        self.obj_init_pos = self.init_config['obj_init_pos']
        self.hand_init_pos = self.init_config['hand_init_pos']

        assert obs_type in OBS_TYPE
        self.obs_type = obs_type

        if goal_low is None:
            goal_low = self.hand_low

        if goal_high is None:
            goal_high = self.hand_high

        self.random_init = random_init
        self.liftThresh = liftThresh
        self.max_path_length = 200
        self.rewMode = rewMode
        self.rotMode = rotMode
        self.obs_type = obs_type
        if rotMode == 'fixed':
            self.action_space = Box(
                np.array([-1, -1, -1, -1]),
                np.array([1, 1, 1, 1]),
            )
        elif rotMode == 'rotz':
            self.action_rot_scale = 1. / 50
            self.action_space = Box(
                np.array([-1, -1, -1, -np.pi, -1]),
                np.array([1, 1, 1, np.pi, 1]),
            )
        elif rotMode == 'quat':
            self.action_space = Box(
                np.array([-1, -1, -1, 0, -1, -1, -1, -1]),
                np.array([1, 1, 1, 2 * np.pi, 1, 1, 1, 1]),
            )
        else:
            self.action_space = Box(
                np.array([-1, -1, -1, -np.pi / 2, -np.pi / 2, 0, -1]),
                np.array([1, 1, 1, np.pi / 2, np.pi / 2, np.pi * 2, 1]),
            )
        self.obj_and_goal_space = Box(
            np.array(obj_low),
            np.array(obj_high),
        )
        self.goal_space = Box(np.array(goal_low), np.array(goal_high))
        if self.obs_type == 'plain':
            self.observation_space = Box(
                np.hstack((self.hand_low, obj_low)),
                np.hstack((self.hand_high, obj_high)),
            )
        elif self.obs_type == 'with_goal':
            self.observation_space = Box(
                np.hstack((self.hand_low, obj_low, goal_low)),
                np.hstack((self.hand_high, obj_high, goal_high)),
            )
        else:
            raise NotImplementedError
        self.reset()
Beispiel #9
0
    def __init__(
            self,
            obj_low=None,
            obj_high=None,
            goal_low=None,
            goal_high=None,
            hand_init_pos=(0, 0.6, 0.2),
            liftThresh=0.04,
            rewMode='orig',
            rotMode='rotz',
            problem="rand",
            door=0,  # 0: not using door env, 1: using default door, 3: door w/ 3 distractors, 5: door w/ 5 distractors
            drawer=False,
            exploration="hard",
            filepath="test",
            max_path_length=50,
            verbose=1,
            hard=False,
            log_freq=100,  # in terms of episode num
            **kwargs):
        self.randomize = False
        self.door = door  # non zero, use door env (w/ varying distractors)
        self.hard = hard  # if True, blocks are initialized to diff corners
        self.drawer = drawer
        self.exploration = exploration
        self.max_path_length = max_path_length
        self.cur_path_length = 0
        self.quick_init(locals())
        hand_low = (-0.2, 0.4, 0.0)
        hand_high = (0.2, 0.8, 0.20)
        obj_low = (-0.3, 0.4, 0.1)
        obj_high = (0.3, 0.8, 0.3)
        SawyerXYZEnv.__init__(self,
                              frame_skip=5,
                              action_scale=1. / 20,
                              hand_low=hand_low,
                              hand_high=hand_high,
                              model_name=self.model_name,
                              **kwargs)
        if obj_low is None:
            obj_low = self.hand_low

        if goal_low is None:
            goal_low = self.hand_low

        if obj_high is None:
            obj_high = self.hand_high

        if goal_high is None:
            goal_high = self.hand_high

        self.liftThresh = liftThresh
        self.rewMode = rewMode
        self.rotMode = rotMode
        self.hand_init_pos = np.array(hand_init_pos)
        self.action_rot_scale = 1. / 10
        self.action_space = Box(
            np.array([-1, -1, -1, -np.pi, -1]),
            np.array([1, 1, 1, np.pi, 1]),
        )
        self.hand_and_obj_space = Box(
            np.hstack((self.hand_low, obj_low)),
            np.hstack((self.hand_high, obj_high)),
        )

        self.imsize = 64
        self.observation_space = Box(0, 1.0, (self.imsize * self.imsize * 3, ))
        self.goal_space = self.observation_space
        '''For Logging'''
        self.verbose = verbose
        if self.verbose:
            self.imgs = []
            self.filepath = filepath
            if not os.path.exists(self.filepath):
                os.mkdir(self.filepath)
        self.log_freq = log_freq
        self.epcount = 0  # num episodes so far
        self.good_qpos = None  # self.data.qpos[:7]
Beispiel #10
0
    def __init__(
            self,
            random_init=False,
            task_types=['pick_place', 'reach', 'push'],
            task_type='pick_place',
            obs_type='plain',
            goal_low=(-0.1, 0.8, 0.05),
            goal_high=(0.1, 0.9, 0.3),
            liftThresh=0.04,
            sampleMode='equal',
            rotMode='fixed',  #'fixed',
            **kwargs):
        self.quick_init(locals())

        hand_low = (-0.5, 0.40, 0.05)
        hand_high = (0.5, 1, 0.5)
        obj_low = (-0.02, 0.58, 0.02)
        obj_high = (0.02, 0.62, 0.02)

        SawyerXYZEnv.__init__(self,
                              frame_skip=5,
                              action_scale=1. / 100,
                              hand_low=hand_low,
                              hand_high=hand_high,
                              model_name=self.model_name,
                              **kwargs)
        self.task_type = task_type
        self.init_config = {
            'obj_init_angle': .3,
            'obj_init_pos': np.array([0, 0.6, 0.02]),
            'hand_init_pos': np.array([0, .6, .2]),
        }
        # we only do one task from [pick_place, reach, push]
        # per instance of SawyerReachPushPickPlaceEnv.
        # Please only set task_type from constructor.
        if self.task_type == 'pick_place':
            self.goal = np.array([0.1, 0.8, 0.2])
        elif self.task_type == 'reach':
            self.goal = np.array([-0.1, 0.8, 0.2])
        elif self.task_type == 'push':
            self.goal = np.array([0.1, 0.8, 0.02])
        else:
            raise NotImplementedError
        self.obj_init_angle = self.init_config['obj_init_angle']
        self.obj_init_pos = self.init_config['obj_init_pos']
        self.hand_init_pos = self.init_config['hand_init_pos']

        assert obs_type in OBS_TYPE
        self.obs_type = obs_type

        if goal_low is None:
            goal_low = self.hand_low

        if goal_high is None:
            goal_high = self.hand_high

        self.random_init = random_init
        self.liftThresh = liftThresh
        self.max_path_length = 150
        self.rotMode = rotMode
        self.sampleMode = sampleMode
        self.task_types = task_types
        if rotMode == 'fixed':
            self.action_space = Box(
                np.array([-1, -1, -1, -1]),
                np.array([1, 1, 1, 1]),
            )
        elif rotMode == 'rotz':
            self.action_rot_scale = 1. / 50
            self.action_space = Box(
                np.array([-1, -1, -1, -np.pi, -1]),
                np.array([1, 1, 1, np.pi, 1]),
            )
        elif rotMode == 'quat':
            self.action_space = Box(
                np.array([-1, -1, -1, 0, -1, -1, -1, -1]),
                np.array([1, 1, 1, 2 * np.pi, 1, 1, 1, 1]),
            )
        else:
            self.action_space = Box(
                np.array([-1, -1, -1, -np.pi / 2, -np.pi / 2, 0, -1]),
                np.array([1, 1, 1, np.pi / 2, np.pi / 2, np.pi * 2, 1]),
            )
        self.obj_and_goal_space = Box(
            np.hstack((obj_low, goal_low)),
            np.hstack((obj_high, goal_high)),
        )
        self.goal_space = Box(np.array(goal_low), np.array(goal_high))
        if self.obs_type == 'plain':
            self.observation_space = Box(
                np.hstack((
                    self.hand_low,
                    obj_low,
                )),
                np.hstack((
                    self.hand_high,
                    obj_high,
                )),
            )
        elif self.obs_type == 'with_goal':
            self.observation_space = Box(
                np.hstack((self.hand_low, obj_low, goal_low)),
                np.hstack((self.hand_high, obj_high, goal_high)),
            )
        else:
            raise NotImplementedError('If you want to use an observation\
                with_obs_idx, please discretize the goal space after instantiate an environment.'
                                      )
        self.num_resets = 0
        self.reset()
Beispiel #11
0
    def __init__(
            self,
            hand_low=(-0.1, 0.5, 0.02),
            hand_high=(0.1, 1, 0.15),
            obj_low=None,
            obj_high=None,
            random_init=False,
            tasks = [{'goal': np.array([0, 0.77, 0.02]),  'obj_init_pos':np.array([0, 0.6, 0.02]), 'obj_init_angle': 0.3}],
            goal_low=None,
            goal_high=None,
            hand_init_pos = (0, 0.6, 0.06),
            liftThresh = 0.04,
            rewMode = 'dense',
            rotMode='fixed',#'fixed',
            timestep=0.0025,
            **kwargs
    ):
        self.quick_init(locals())
        SawyerXYZEnv.__init__(
            self,
            frame_skip=5,
            action_scale=1./100,
            hand_low=hand_low,
            hand_high=hand_high,
            model_name=self.model_name,
            **kwargs
        )
        if obj_low is None:
            obj_low = self.hand_low

        if goal_low is None:
            goal_low = self.hand_low

        if obj_high is None:
            obj_high = self.hand_high

        if goal_high is None:
            goal_high = self.hand_high

        self.random_init = random_init
        self.liftThresh = liftThresh
        self.max_path_length = 200#150
        self.tasks = tasks
        self.num_tasks = len(tasks)
        self.rewMode = rewMode
        self.rotMode = rotMode
        self.hand_init_pos = np.array(hand_init_pos)
        if rotMode == 'fixed':
            self.action_space = Box(
                np.array([-1, -1, -1, -1]),
                np.array([1, 1, 1, 1]),
            )
        elif rotMode == 'rotz':
            self.action_rot_scale = 1./50
            self.action_space = Box(
                np.array([-1, -1, -1, -np.pi, -1]),
                np.array([1, 1, 1, np.pi, 1]),
            )
        elif rotMode == 'quat':
            self.action_space = Box(
                np.array([-1, -1, -1, 0, -1, -1, -1, -1]),
                np.array([1, 1, 1, 2*np.pi, 1, 1, 1, 1]),
            )
        else:
            self.action_space = Box(
                np.array([-1, -1, -1, -np.pi/2, -np.pi/2, 0, -1]),
                np.array([1, 1, 1, np.pi/2, np.pi/2, np.pi*2, 1]),
            )
        self.obj_and_goal_space = Box(
            np.hstack((obj_low, obj_low)),
            np.hstack((obj_low, obj_high)),
        )
        self.goal_space = Box(goal_low, goal_high)
        self.observation_space = Box(
                np.hstack((self.hand_low, obj_low, obj_low)),
                np.hstack((self.hand_high, obj_high, obj_high)),
        )
        # self.observation_space = Dict([
        #     ('state_observation', self.hand_and_obj_space),
        #     ('state_desired_goal', self.goal_space),
        #     ('state_achieved_goal', self.goal_space),
        # ])
        self._get_reference()
        self.reset()
Beispiel #12
0
    def __init__(self,
                 random_init=False,
                 obs_type='plain',
                 goal_low=None,
                 goal_high=None,
                 rotMode='fixed',
                 **kwargs):
        self.quick_init(locals())
        hand_low = (-0.3, 0.40, 0.05)
        hand_high = (0.3, 1, 0.05)
        obj_low = (0., 0.85, 0.1)
        obj_high = (0.1, 0.95, 0.1)
        SawyerXYZEnv.__init__(self,
                              frame_skip=5,
                              action_scale=1. / 20,
                              hand_low=hand_low,
                              hand_high=hand_high,
                              model_name=self.model_name,
                              **kwargs)

        self.init_config = {
            'obj_init_angle': np.array([
                0.3,
            ], dtype=np.float32),
            'obj_init_pos': np.array([0.1, 0.95, 0.1], dtype=np.float32),
            'hand_init_pos': np.array([0, 0.6, 0.2], dtype=np.float32),
        }
        self.problem = "rand"

        self.obj_init_pos = self.init_config['obj_init_pos']
        self.obj_init_angle = self.init_config['obj_init_angle']
        self.hand_init_pos = self.init_config['hand_init_pos']

        assert obs_type in OBS_TYPE
        self.obs_type = obs_type

        if goal_low is None:
            goal_low = self.hand_low

        if goal_high is None:
            goal_high = self.hand_high

        self.random_init = random_init
        self.max_path_length = 100

        self.rotMode = rotMode
        if rotMode == 'fixed':
            self.action_space = Box(
                np.array([-1, -1, -1, -1]),
                np.array([1, 1, 1, 1]),
            )
        elif rotMode == 'rotz':
            self.action_rot_scale = 1. / 50
            self.action_space = Box(
                np.array([-1, -1, -1, -np.pi, -1]),
                np.array([1, 1, 1, np.pi, 1]),
            )
        elif rotMode == 'quat':
            self.action_space = Box(
                np.array([-1, -1, -1, 0, -1, -1, -1, -1]),
                np.array([1, 1, 1, 2 * np.pi, 1, 1, 1, 1]),
            )
        else:
            self.action_space = Box(
                np.array([-1, -1, -1, -np.pi / 2, -np.pi / 2, 0, -1]),
                np.array([1, 1, 1, np.pi / 2, np.pi / 2, np.pi * 2, 1]),
            )
        self.obj_and_goal_space = Box(
            np.array(obj_low),
            np.array(obj_high),
        )
        self.goal_space = Box(np.array(goal_low), np.array(goal_high))
        self.door_angle_idx = self.model.get_joint_qpos_addr('doorjoint')
Beispiel #13
0
    def __init__(
            self,
            random_init=False,
            obs_type='with_goal',
            goal_low=None,
            goal_high=None,
            rotMode='fixed',
            timestep=0.0025,
            params='fixed_goal_unconstrainted',
            reward_delay=1,
            **kwargs
    ):
        self.quick_init(locals())
        self.reward_delay = reward_delay
        self.count = 0
        random_init=param[params]['random_init']
        hand_low=param[params]['hand_low']
        hand_high=param[params]['hand_high']
        self.init_config = param[params]['init_config']
        obj_low=(0., 0.85, 0.1)
        obj_high=(0.1, 0.95, 0.1)
        SawyerXYZEnv.__init__(
            self,
            frame_skip=5,
            action_scale=1./100,
            hand_low=hand_low,
            hand_high=hand_high,
            model_name=self.model_name,
            **kwargs
        )
        self.imsize = 256

        self.camera = create_sawyer_camera_init(distance=0.6)
        self.initialize_camera(self.camera)
        self.goal = np.array([-0.2, 0.7, 0.15])
        self.obj_init_pos = self.init_config['obj_init_pos']
        self.obj_init_angle = self.init_config['obj_init_angle']
        self.hand_init_pos = self.init_config['hand_init_pos']

        assert obs_type in OBS_TYPE
        self.obs_type = obs_type

        if goal_low is None:
            goal_low = self.hand_low

        if goal_high is None:
            goal_high = self.hand_high

        self.random_init = random_init
        self.max_path_length = 150

        self.rotMode = rotMode
        if rotMode == 'fixed':
            self.action_space = Box(
                np.array([-1, -1, -1, -1]),
                np.array([1, 1, 1, 1]),
            )
        elif rotMode == 'rotz':
            self.action_rot_scale = 1./50
            self.action_space = Box(
                np.array([-1, -1, -1, -np.pi, -1]),
                np.array([1, 1, 1, np.pi, 1]),
            )
        elif rotMode == 'quat':
            self.action_space = Box(
                np.array([-1, -1, -1, 0, -1, -1, -1, -1]),
                np.array([1, 1, 1, 2*np.pi, 1, 1, 1, 1]),
            )
        else:
            self.action_space = Box(
                np.array([-1, -1, -1, -np.pi/2, -np.pi/2, 0, -1]),
                np.array([1, 1, 1, np.pi/2, np.pi/2, np.pi*2, 1]),
            )
        self.obj_and_goal_space = Box(
            np.array(obj_low),
            np.array(obj_high),
        )
        self.goal_space = Box(np.array(goal_low), np.array(goal_high))
        if self.obs_type == 'plain':
            self.observation_space = Box(
                np.hstack((self.hand_low, obj_low,)),
                np.hstack((self.hand_high, obj_high,)),
            )
        elif self.obs_type == 'with_goal':
            self.observation_space = Box(
                np.hstack((self.hand_low, obj_low, goal_low)),
                np.hstack((self.hand_high, obj_high, goal_high)),
            )
        else:
            raise NotImplementedError
        self.door_angle_idx = self.model.get_joint_qpos_addr('doorjoint')
        self.reset()
    def __init__(
            self,
            random_init=True,
            obs_type='with_goal',
            # tasks = [{'goal': np.array([0., 0.6, 0.02]),  'obj_init_pos':np.array([0., 0.8, 0.001]), 'obj_init_angle': 0.3}],
            # goal_low=(-0.1, 0.6, 0.02),
            # goal_high=(0.1, 0.6, 0.02),
            tasks=[{
                'goal': np.array([0., 0.92, 0.02]),
                'obj_init_pos': np.array([0., 0.8, 0.081]),
                'obj_init_angle': 0.3
            }],
            goal_low=(0, 0.92, 0.02),
            goal_high=(0, 0.92, 0.02),
            hand_init_pos=(0, 0.6, 0.2),
            liftThresh=0.1,
            rewMode='orig',
            rotMode='fixed',  #'fixed',
            multitask=False,
            multitask_num=1,
            **kwargs):

        hand_low = (-0.5, 0.40, 0.05)
        hand_high = (0.5, 1, 0.5)
        obj_low = (-0.1, 0.75, 0.081)
        obj_high = (0.1, 0.8, 0.081)
        SawyerXYZEnv.__init__(self,
                              frame_skip=5,
                              action_scale=1. / 100,
                              hand_low=hand_low,
                              hand_high=hand_high,
                              model_name=self.model_name,
                              **kwargs)
        assert obs_type in OBS_TYPE
        if multitask:
            obs_type = 'with_goal_and_id'
        self.obs_type = obs_type

        if goal_low is None:
            goal_low = self.hand_low

        if goal_high is None:
            goal_high = self.hand_high

        self.random_init = random_init
        self.liftThresh = liftThresh
        self.max_path_length = 200  #200#150
        self.tasks = tasks
        self.num_tasks = len(tasks)
        self.rewMode = rewMode
        self.rotMode = rotMode
        self.hand_init_pos = np.array(hand_init_pos)
        self.multitask = multitask
        self.multitask_num = multitask_num
        self._state_goal_idx = np.zeros(multitask_num)
        if rotMode == 'fixed':
            self.action_space = Box(
                np.array([-1, -1, -1, -1]),
                np.array([1, 1, 1, 1]),
            )
        elif rotMode == 'rotz':
            self.action_rot_scale = 1. / 50
            self.action_space = Box(
                np.array([-1, -1, -1, -np.pi, -1]),
                np.array([1, 1, 1, np.pi, 1]),
            )
        elif rotMode == 'quat':
            self.action_space = Box(
                np.array([-1, -1, -1, 0, -1, -1, -1, -1]),
                np.array([1, 1, 1, 2 * np.pi, 1, 1, 1, 1]),
            )
        else:
            self.action_space = Box(
                np.array([-1, -1, -1, -np.pi / 2, -np.pi / 2, 0, -1]),
                np.array([1, 1, 1, np.pi / 2, np.pi / 2, np.pi * 2, 1]),
            )
        self.obj_and_goal_space = Box(
            np.hstack((obj_low, goal_low)),
            np.hstack((obj_high, goal_high)),
        )
        self.goal_space = Box(np.array(goal_low), np.array(goal_high))
        if not multitask and self.obs_type == 'with_goal_id':
            self.observation_space = Box(
                np.hstack((self.hand_low, obj_low, np.zeros(len(tasks)))),
                np.hstack((self.hand_high, obj_high, np.ones(len(tasks)))),
            )
        elif not multitask and self.obs_type == 'plain':
            self.observation_space = Box(
                np.hstack((
                    self.hand_low,
                    obj_low,
                )),
                np.hstack((
                    self.hand_high,
                    obj_high,
                )),
            )
        elif not multitask and self.obs_type == 'with_goal':
            self.observation_space = Box(
                np.hstack((self.hand_low, obj_low, goal_low)),
                np.hstack((self.hand_high, obj_high, goal_high)),
            )
        else:
            self.observation_space = Box(
                np.hstack((self.hand_low, obj_low, goal_low,
                           np.zeros(multitask_num))),
                np.hstack((self.hand_high, obj_high, goal_high,
                           np.zeros(multitask_num))),
            )
        self.reset()
Beispiel #15
0
    def __init__(self,
                 random_init=False,
                 obs_type='plain',
                 goal_low=(0., 0.85, 0.05),
                 goal_high=(0.3, 0.9, 0.05),
                 liftThresh=0.09,
                 rotMode='fixed',
                 rewMode='orig',
                 **kwargs):
        self.quick_init(locals())
        hand_low = (-0.5, 0.40, 0.05)
        hand_high = (0.5, 1, 0.5)
        obj_low = (-0.1, 0.5, 0.02)
        obj_high = (0.1, 0.6, 0.02)
        SawyerXYZEnv.__init__(self,
                              frame_skip=5,
                              action_scale=1. / 100,
                              hand_low=hand_low,
                              hand_high=hand_high,
                              model_name=self.model_name,
                              **kwargs)
        # TODO should we put this to goal instead of initial config?
        self.init_config = {
            'hammer_init_pos': np.array([0, 0.6, 0.02]),
            'hand_init_pos': np.array([0, 0.6, 0.2]),
        }
        self.goal = self.init_config['hammer_init_pos']  # TODO: check this
        self.hammer_init_pos = self.init_config['hammer_init_pos']
        self.hand_init_pos = self.init_config['hand_init_pos']

        if goal_low is None:
            goal_low = self.hand_low

        if goal_high is None:
            goal_high = self.hand_high

        assert obs_type in OBS_TYPE
        self.obs_type = obs_type

        self.random_init = random_init
        self.liftThresh = liftThresh
        self.max_path_length = 200
        self.rewMode = rewMode
        self.rotMode = rotMode
        if rotMode == 'fixed':
            self.action_space = Box(
                np.array([-1, -1, -1, -1]),
                np.array([1, 1, 1, 1]),
            )
        elif rotMode == 'rotz':
            self.action_rot_scale = 1. / 50
            self.action_space = Box(
                np.array([-1, -1, -1, -np.pi, -1]),
                np.array([1, 1, 1, np.pi, 1]),
            )
        elif rotMode == 'quat':
            self.action_space = Box(
                np.array([-1, -1, -1, 0, -1, -1, -1, -1]),
                np.array([1, 1, 1, 2 * np.pi, 1, 1, 1, 1]),
            )
        else:
            self.action_space = Box(
                np.array([-1, -1, -1, -np.pi / 2, -np.pi / 2, 0, -1]),
                np.array([1, 1, 1, np.pi / 2, np.pi / 2, np.pi * 2, 1]),
            )
        self.obj_and_goal_space = Box(
            np.hstack((obj_low, goal_low)),
            np.hstack((obj_high, goal_high)),
        )
        self.goal_space = Box(np.array(goal_low), np.array(goal_high))
        if self.obs_type == 'plain':
            self.observation_space = Box(
                np.hstack((
                    self.hand_low,
                    obj_low,
                )),
                np.hstack((
                    self.hand_high,
                    obj_high,
                )),
            )
        elif self.obs_type == 'with_goal':
            self.observation_space = Box(
                np.hstack((self.hand_low, obj_low, goal_low)),
                np.hstack((self.hand_high, obj_high, goal_high)),
            )
        else:
            raise NotImplementedError
        self.reset()
Beispiel #16
0
    def __init__(self,
                 random_init=False,
                 obs_type='plain',
                 goal_low=None,
                 goal_high=None,
                 rotMode='fixed',
                 **kwargs):
        self.quick_init(locals())
        hand_low = (-0.5, 0.40, 0.05)
        hand_high = (0.5, 1, 0.5)
        obj_low = (-0.1, 0.7, 0.05)
        obj_high = (0.1, 0.8, 0.05)
        SawyerXYZEnv.__init__(self,
                              frame_skip=5,
                              action_scale=1. / 100,
                              hand_low=hand_low,
                              hand_high=hand_high,
                              model_name=self.model_name,
                              **kwargs)
        self.init_config = {
            'obj_init_pos': np.array([0, 0.7, 0.05]),
            'hand_init_pos': np.array([0, 0.6, 0.2], dtype=np.float32),
        }
        self.goal = np.array([0., 0.73, 0.08])
        self.obj_init_pos = self.init_config['obj_init_pos']
        self.hand_init_pos = self.init_config['hand_init_pos']

        assert obs_type in OBS_TYPE
        self.obs_type = obs_type

        if goal_low is None:
            goal_low = self.hand_low

        if goal_high is None:
            goal_high = self.hand_high

        self.random_init = random_init
        self.max_path_length = 150
        self.rotMode = rotMode
        if rotMode == 'fixed':
            self.action_space = Box(
                np.array([-1, -1, -1, -1]),
                np.array([1, 1, 1, 1]),
            )
        elif rotMode == 'rotz':
            self.action_rot_scale = 1. / 50
            self.action_space = Box(
                np.array([-1, -1, -1, -np.pi, -1]),
                np.array([1, 1, 1, np.pi, 1]),
            )
        elif rotMode == 'quat':
            self.action_space = Box(
                np.array([-1, -1, -1, 0, -1, -1, -1, -1]),
                np.array([1, 1, 1, 2 * np.pi, 1, 1, 1, 1]),
            )
        else:
            self.action_space = Box(
                np.array([-1, -1, -1, -np.pi / 2, -np.pi / 2, 0, -1]),
                np.array([1, 1, 1, np.pi / 2, np.pi / 2, np.pi * 2, 1]),
            )
        self.obj_and_goal_space = Box(
            np.array(obj_low),
            np.array(obj_high),
        )
        self.goal_space = Box(np.array(goal_low), np.array(goal_high))
        if self.obs_type == 'plain':
            self.observation_space = Box(
                np.hstack((
                    self.hand_low,
                    obj_low,
                )),
                np.hstack((
                    self.hand_high,
                    obj_high,
                )),
            )
        elif self.obs_type == 'with_goal':
            self.observation_space = Box(
                np.hstack((self.hand_low, obj_low, goal_low)),
                np.hstack((self.hand_high, obj_high, goal_high)),
            )
        else:
            raise NotImplementedError
        self.reset()
Beispiel #17
0
    def __init__(
            self,
            random_init=False,
            task_types=['pick_place', 'reach', 'push'],
            task_type='push',
            obs_type='with_goal',
            goal_low=(-0.1, 0.8, 0.05),
            goal_high=(0.1, 0.9, 0.3),
            liftThresh=0.04,
            sampleMode='equal',
            rewMode='orig',
            timestep=0.0025,
            rotMode='fixed',  #'fixed',
            reward_delay=1,
            params='fixed_goal_unconstrainted',
            visual=False,
            **kwargs):
        self.quick_init(locals())
        self.reward_delay = reward_delay
        self.count = 0
        random_init = param[params]['random_init']
        hand_low = param[params]['hand_low']
        hand_high = param[params]['hand_high']
        self.init_config = param[params]['init_config']
        obj_low = (-0.1, 0.6, 0.02)
        obj_high = (0.1, 0.7, 0.02)

        SawyerXYZEnv.__init__(self,
                              frame_skip=5,
                              action_scale=1. / 100,
                              hand_low=hand_low,
                              hand_high=hand_high,
                              model_name=self.model_name,
                              **kwargs)
        self.task_type = task_type

        # we only do one task from [pick_place, reach, push]
        # per instance of SawyerReachPushPickPlaceEnv.
        # Please only set task_type from constructor.
        if self.task_type == 'pick_place':
            self.goal = np.array([0.1, 0.8, 0.2])
        elif self.task_type == 'reach':
            self.goal = np.array([-0.1, 0.8, 0.2])
        elif self.task_type == 'push':
            self.goal = np.array([0.1, 0.8, 0.02])
        else:
            raise NotImplementedError
        self.obj_init_angle = self.init_config['obj_init_angle']
        self.obj_init_pos = self.init_config['obj_init_pos']
        self.hand_init_pos = self.init_config['hand_init_pos']

        assert obs_type in OBS_TYPE
        self.obs_type = obs_type

        if goal_low is None:
            goal_low = self.hand_low

        if goal_high is None:
            goal_high = self.hand_high

        self.random_init = random_init
        self.liftThresh = liftThresh
        self.max_path_length = 150
        self.rewMode = rewMode
        self.rotMode = rotMode
        self.sampleMode = sampleMode
        self.task_types = task_types
        if rotMode == 'fixed':
            self.action_space = Box(
                np.array([-1, -1, -1]),
                np.array([1, 1, 1]),
            )
        elif rotMode == 'rotz':
            self.action_rot_scale = 1. / 50
            self.action_space = Box(
                np.array([-1, -1, -1, -np.pi, -1]),
                np.array([1, 1, 1, np.pi, 1]),
            )
        elif rotMode == 'quat':
            self.action_space = Box(
                np.array([-1, -1, -1, 0, -1, -1, -1, -1]),
                np.array([1, 1, 1, 2 * np.pi, 1, 1, 1, 1]),
            )
        else:
            self.action_space = Box(
                np.array([-1, -1, -1, -np.pi / 2, -np.pi / 2, 0, -1]),
                np.array([1, 1, 1, np.pi / 2, np.pi / 2, np.pi * 2, 1]),
            )
        self.obj_and_goal_space = Box(
            np.hstack((obj_low, goal_low)),
            np.hstack((obj_high, goal_high)),
        )
        self.goal_space = Box(np.array(goal_low), np.array(goal_high))
        if self.obs_type == 'plain':
            self.observation_space = Box(
                np.hstack((
                    self.hand_low,
                    obj_low,
                )),
                np.hstack((
                    self.hand_high,
                    obj_high,
                )),
            )
        elif self.obs_type == 'with_goal':
            self.observation_space = Box(
                np.hstack((self.hand_low, obj_low, goal_low)),
                np.hstack((self.hand_high, obj_high, goal_high)),
            )
        else:
            raise NotImplementedError('If you want to use an observation\
                with_obs_idx, please discretize the goal space after instantiate an environment.'
                                      )
        self.num_resets = 0
        self.sim.model.opt.timestep = timestep
        self.imsize = 84
        self.visual = visual
        if self.visual:
            high = np.ones((self.imsize, self.imsize, 3))
            low = 0 * high
            self.observation_space = Box(low, high)
        self.sim.model.opt.timestep = timestep
        self.reset()