def __init__(self,
                 reward_type='hand_distance',
                 norm_order=1,
                 indicator_threshold=0.06,
                 fix_goal=False,
                 fixed_goal=(0.15, 0.6, 0.3),
                 hide_goal_markers=False,
                 **kwargs):
        self.quick_init(locals())
        MultitaskEnv.__init__(self)
        SawyerXYZEnv.__init__(self, model_name=self.model_name, **kwargs)

        self.reward_type = reward_type
        self.norm_order = norm_order
        self.indicator_threshold = indicator_threshold

        self.fix_goal = fix_goal
        self.fixed_goal = np.array(fixed_goal)
        self._state_goal = None
        self.hide_goal_markers = hide_goal_markers
        self.action_space = Box(np.array([-1, -1, -1]), np.array([1, 1, 1]))
        self.hand_space = Box(self.hand_low, self.hand_high)
        self.observation_space = Dict([
            ('observation', self.hand_space),
            ('desired_goal', self.hand_space),
            ('achieved_goal', self.hand_space),
            ('state_observation', self.hand_space),
            ('state_desired_goal', self.hand_space),
            ('state_achieved_goal', self.hand_space),
            ('proprio_observation', self.hand_space),
            ('proprio_desired_goal', self.hand_space),
            ('proprio_achieved_goal', self.hand_space),
        ])
Exemple #2
0
    def __init__(
        self,
        goal_low=(-.25, .3, .12, -1.5708),
        goal_high=(.25, .6, .12, 0),
        action_reward_scale=0,
        reward_type='angle_difference',
        indicator_threshold=(.02, .03),
        fix_goal=False,
        fixed_goal=(0, .45, .12, -.25),
        reset_free=False,
        fixed_hand_z=0.12,
        hand_low=(-0.25, 0.3, .12),
        hand_high=(0.25, 0.6, .12),
        target_pos_scale=1,
        target_angle_scale=1,
        min_angle=-1.5708,
        max_angle=0,
        xml_path='sawyer_xyz/sawyer_door_pull.xml',
        **sawyer_xyz_kwargs
    ):
        self.quick_init(locals())
        self.model_name = get_asset_full_path(xml_path)
        SawyerXYZEnv.__init__(
            self,
            self.model_name,
            hand_low=hand_low,
            hand_high=hand_high,
            **sawyer_xyz_kwargs
        )
        MultitaskEnv.__init__(self)

        self.reward_type = reward_type
        self.indicator_threshold = indicator_threshold

        self.fix_goal = fix_goal
        self.fixed_goal = np.array(fixed_goal)
        self.goal_space = Box(np.array(goal_low), np.array(goal_high))
        self._state_goal = None
        self.fixed_hand_z = fixed_hand_z

        self.action_space = Box(np.array([-1, -1]), np.array([1, 1]))
        self.state_space = Box(
            np.concatenate((hand_low, [min_angle])),
            np.concatenate((hand_high, [max_angle])),
        )
        self.observation_space = Dict([
            ('observation', self.state_space),
            ('desired_goal', self.goal_space),
            ('achieved_goal', self.state_space),
            ('state_observation', self.state_space),
            ('state_desired_goal', self.goal_space),
            ('state_achieved_goal', self.state_space),
        ])
        self.action_reward_scale = action_reward_scale
        self.target_pos_scale = target_pos_scale
        self.target_angle_scale = target_angle_scale
        self.reset_free = reset_free
        self.door_angle_idx = self.model.get_joint_qpos_addr('doorjoint')
        self.reset()
    def __init__(self,
                 puck_low=None,
                 puck_high=None,
                 reward_type='hand_and_puck_distance',
                 indicator_threshold=0.06,
                 fix_goal=False,
                 fixed_goal=(0.15, 0.6, 0.055, -0.15, 0.6),
                 goal_low=None,
                 goal_high=None,
                 hide_goal_markers=False,
                 **kwargs):
        self.quick_init(locals())
        MultitaskEnv.__init__(self)
        SawyerXYZEnv.__init__(self, model_name=self.model_name, **kwargs)
        if puck_low is None:
            puck_low = self.hand_low[:2]
        if puck_high is None:
            puck_high = self.hand_high[:2]
        puck_low = np.array(puck_low)
        puck_high = np.array(puck_high)

        self.puck_low = puck_low
        self.puck_high = puck_high

        if goal_low is None:
            goal_low = np.hstack((self.hand_low, puck_low))
        if goal_high is None:
            goal_high = np.hstack((self.hand_high, puck_high))
        goal_low = np.array(goal_low)
        goal_high = np.array(goal_high)

        self.reward_type = reward_type
        self.indicator_threshold = indicator_threshold

        self.fix_goal = fix_goal
        self.fixed_goal = np.array(fixed_goal)
        self._state_goal = None

        self.hide_goal_markers = hide_goal_markers

        self.action_space = Box(np.array([-1, -1, -1]), np.array([1, 1, 1]))
        self.hand_and_puck_space = Box(
            np.hstack((self.hand_low, puck_low)),
            np.hstack((self.hand_high, puck_high)),
        )
        self.hand_space = Box(self.hand_low, self.hand_high)
        self.observation_space = Dict([
            ('observation', self.hand_and_puck_space),
            ('desired_goal', self.hand_and_puck_space),
            ('achieved_goal', self.hand_and_puck_space),
            ('state_observation', self.hand_and_puck_space),
            ('state_desired_goal', self.hand_and_puck_space),
            ('state_achieved_goal', self.hand_and_puck_space),
            ('proprio_observation', self.hand_space),
            ('proprio_desired_goal', self.hand_space),
            ('proprio_achieved_goal', self.hand_space),
        ])
        self.init_puck_z = self.get_puck_pos()[2]
Exemple #4
0
    def __init__(self,
                 obj_low=None,
                 obj_high=None,
                 reward_type='hand_and_obj_distance',
                 indicator_threshold=0.06,
                 obj_init_pos=(0, 0.65, 0.03),
                 fix_goal=False,
                 fixed_goal=(0.15, 0.6, 0.055, -0.15, 0.6),
                 goal_low=None,
                 goal_high=None,
                 hide_goal_markers=False,
                 hide_arm=False,
                 num_objects=1,
                 **kwargs):
        self.quick_init(locals())
        MultitaskEnv.__init__(self)
        self.hide_arm = hide_arm
        SawyerXYZEnv.__init__(self, model_name=self.model_name, **kwargs)
        self.num_objects = num_objects
        if obj_low is None:
            obj_low = self.hand_low
        if obj_high is None:
            obj_high = self.hand_high

        if goal_low is None:
            goal_low = np.hstack((self.hand_low, np.tile(obj_low,
                                                         num_objects)))
        if goal_high is None:
            goal_high = np.hstack(
                (self.hand_high, np.tile(obj_high, num_objects)))

        self.reward_type = reward_type
        self.indicator_threshold = indicator_threshold

        self.obj_init_pos = np.array(obj_init_pos)

        self.fix_goal = fix_goal
        self.fixed_goal = np.array(fixed_goal)
        self._state_goal = None

        self.hide_goal_markers = hide_goal_markers

        self.action_space = Box(
            np.array([-1, -1, -1, -1]),
            np.array([1, 1, 1, 1]),
        )
        self.hand_and_obj_space = Box(
            np.hstack((self.hand_low, np.tile(obj_low, num_objects))),
            np.hstack((self.hand_high, np.tile(obj_high, num_objects))),
        )
        self.observation_space = Dict([
            ('observation', self.hand_and_obj_space),
            ('desired_goal', self.hand_and_obj_space),
            ('achieved_goal', self.hand_and_obj_space),
            ('state_observation', self.hand_and_obj_space),
            ('state_desired_goal', self.hand_and_obj_space),
            ('state_achieved_goal', self.hand_and_obj_space),
        ])
    def __init__(
            self,
            obj_low=None,
            obj_high=None,
            reward_type='hand_and_obj_distance',
            indicator_threshold=0.06,
            obj_init_pos=(0, 0.6, 0.02),
            fix_goal=True,
            fixed_goal=(0, 0.85, 0.12, 0.1),
            #3D placing goal, followed by height target for picking
            goal_low=None,
            goal_high=None,
            hide_goal_markers=False,
            **kwargs):
        self.quick_init(locals())
        MultitaskEnv.__init__(self)
        SawyerXYZEnv.__init__(self, model_name=self.model_name, **kwargs)
        if obj_low is None:
            obj_low = self.hand_low
        if obj_high is None:
            obj_high = self.hand_high

        if goal_low is None:
            goal_low = np.hstack((self.hand_low, obj_low))
        if goal_high is None:
            goal_high = np.hstack((self.hand_high, obj_high))

        self.max_path_length = 200

        self.reward_type = reward_type
        self.indicator_threshold = indicator_threshold

        self.obj_init_pos = np.array(obj_init_pos)

        self.fix_goal = fix_goal
        self.fixed_goal = np.array(fixed_goal)

        self.hide_goal_markers = hide_goal_markers

        self.action_space = Box(
            np.array([-1, -1, -1, -1]),
            np.array([1, 1, 1, 1]),
        )
        self.hand_and_obj_space = Box(
            np.hstack((self.hand_low, obj_low)),
            np.hstack((self.hand_high, obj_high)),
        )
        self.observation_space = Dict([
            ('observation', self.hand_and_obj_space),
            ('desired_goal', self.hand_and_obj_space),
            ('achieved_goal', self.hand_and_obj_space),
            ('state_observation', self.hand_and_obj_space),
            ('state_desired_goal', self.hand_and_obj_space),
            ('state_achieved_goal', self.hand_and_obj_space),
        ])

        self.reset()
    def __init__(self,
                 doorGrasp_low=None,
                 doorGrasp_high=None,
                 tasks=[{
                     'goalAngle': [1.0472],
                     'door_init_pos': [0, 1, 0.3]
                 }],
                 goal_low=np.array([0]),
                 goal_high=np.array([1.58825]),
                 hand_init_pos=(0, 0.4, 0.1),
                 fixed_door_pos=(0, 1.0, 0.3),
                 image=False,
                 doorHalfWidth=0.2,
                 mpl=100,
                 **kwargs):

        self.quick_init(locals())

        SawyerXYZEnv.__init__(self, model_name=self.model_name, **kwargs)
        if doorGrasp_low is None:
            doorGrasp_low = self.hand_low
        if doorGrasp_high is None:
            doorGrasp_high = self.hand_high

        self.max_path_length = mpl

        self.doorHalfWidth = doorHalfWidth
        self.fixed_door_pos = np.array([0, 1.0, 0.3])
        self.hand_init_pos = np.array([0, 0.4, 0.1])
        self.info_logKeys = ['angleDelta', 'doorAngle', 'doorAngleTarget']

        import pickle
        self.tasks = np.array(tasks)
        self.num_tasks = len(tasks)

        self.action_space = Box(np.array([-1, -1, -1]),
                                np.array([1, 1, 1]),
                                dtype=np.float32)
        self.hand_and_door_space = Box(np.hstack(
            (self.hand_low, doorGrasp_low)),
                                       np.hstack(
                                           (self.hand_high, doorGrasp_high)),
                                       dtype=np.float32)

        self.goal_space = Box(goal_low, goal_high)

        self.observation_space = Dict([
            ('state_observation', self.hand_and_door_space),
            ('state_desired_goal', self.goal_space),
            ('state_achieved_goal', self.goal_space)
        ])
    def __init__(self,
                 obj_low=None,
                 obj_high=None,
                 obj_init_pos=(0, 0.6, 0.02),
                 goals=[[0, 0.7, 0.02]],
                 goal_low=None,
                 goal_high=None,
                 hand_init_pos=(0, 0.4, 0.25),
                 blockSize=0.02,
                 **kwargs):
        self.quick_init(locals())

        SawyerXYZEnv.__init__(self, model_name=self.model_name, **kwargs)
        if obj_low is None:
            obj_low = self.hand_low

        if goal_low is None:
            goal_low = self.hand_low

        if obj_high is None:
            obj_high = self.hand_high

        if goal_high is None:
            goal_high = self.hand_high

        self.max_path_length = 150

        self.goals = goals
        self.num_goals = len(goals)

        self.obj_init_pos = np.array(obj_init_pos)
        self.hand_init_pos = np.array(hand_init_pos)

        self.blockSize = blockSize

        self.action_space = Box(
            np.array([-1, -1, -1, -1, -1]),
            np.array([1, 1, 1, 1, 1]),
        )
        self.hand_and_obj_space = Box(
            np.hstack((self.hand_low, obj_low)),
            np.hstack((self.hand_high, obj_high)),
        )

        self.goal_space = Box(goal_low, goal_high)

        self.observation_space = Dict([('state_observation',
                                        self.hand_and_obj_space),
                                       ('desired_goal', self.goal_space)])

        self.reset()
Exemple #8
0
    def __init__(
            self,
            puck_low=(-.35, .25),
            puck_high=(.35, .95),

            norm_order=1,
            indicator_threshold=0.06,
            touch_threshold=0.1,  # I just chose this number after doing a few runs and looking at a histogram

            hand_low=(-0.28, 0.3, 0.05),
            hand_high=(0.28, 0.9, 0.3),

            fix_goal=True,
            fixed_goal=(0.15, 0.6, 0.02, -0.15, 0.6),
            goal_low=(-0.25, 0.3, 0.02, -0.2, .4),
            goal_high=(0.25, 0.875, 0.02, .2, .8),

            init_puck_z=0.035,
            init_hand_xyz=(0, 0.4, 0.07),

            reset_free=False,
            xml_path='sawyer_xyz/sawyer_push_puck.xml',
            clamp_puck_on_step=False,

            puck_radius=.07,
            **kwargs
    ):
        self.quick_init(locals())
        self.model_name = get_asset_full_path(xml_path)

        self.goal_type = kwargs.pop('goal_type', 'puck')
        self.dense_reward = kwargs.pop('dense_reward', False)
        self.indicator_threshold = kwargs.pop('goal_tolerance', indicator_threshold)
        self.fixed_goal = np.array(kwargs.pop('goal', fixed_goal))
        self.task_agnostic = kwargs.pop('task_agnostic', False)
        self.init_puck_xy = np.array(kwargs.pop('init_puck_xy', [0, 0.6]))
        self.init_hand_xyz = np.array(kwargs.pop('init_hand_xyz', init_hand_xyz))

        MultitaskEnv.__init__(self)
        SawyerXYZEnv.__init__(
            self,
            hand_low=hand_low,
            hand_high=hand_high,
            model_name=self.model_name,
            **kwargs
        )
        if puck_low is None:
            puck_low = self.hand_low[:2]
        if puck_high is None:
            puck_high = self.hand_high[:2]

        self.puck_low = np.array(puck_low)
        self.puck_high = np.array(puck_high)

        if goal_low is None:
            goal_low = np.hstack((self.hand_low, puck_low))
        if goal_high is None:
            goal_high = np.hstack((self.hand_high, puck_high))
        self.goal_low = np.array(goal_low)
        self.goal_high = np.array(goal_high)

        self.norm_order = norm_order
        self.touch_threshold = touch_threshold
        self.fix_goal = fix_goal

        self._state_goal = None

        self.hide_goal_markers = self.task_agnostic

        self.action_space = Box(np.array([-1, -1, -1]), np.array([1, 1, 1]), dtype=np.float32)
        self.hand_and_puck_space = Box(
            np.hstack((self.hand_low, puck_low)),
            np.hstack((self.hand_high, puck_high)),
            dtype=np.float32
        )
        self.hand_space = Box(self.hand_low, self.hand_high, dtype=np.float32)
        self.observation_space = Dict([
            ('observation', self.hand_and_puck_space),
            ('desired_goal', self.hand_and_puck_space),
            ('achieved_goal', self.hand_and_puck_space),
            ('state_observation', self.hand_and_puck_space),
            ('state_desired_goal', self.hand_and_puck_space),
            ('state_achieved_goal', self.hand_and_puck_space),
            ('proprio_observation', self.hand_space),
            ('proprio_desired_goal', self.hand_space),
            ('proprio_achieved_goal', self.hand_space),
        ])

        self.init_puck_z = init_puck_z
        self._set_puck_xy(self.sample_puck_xy())
        self.reset_free = reset_free
        self.reset_counter = 0
        self.puck_space = Box(self.puck_low, self.puck_high, dtype=np.float32)
        self.clamp_puck_on_step = clamp_puck_on_step
        self.puck_radius = puck_radius
        self.reset()
Exemple #9
0
    def __init__(self,
                 block_low=(-0.2, 0.55, 0.02),
                 block_high=(0.2, 0.75, 0.02),
                 hand_low=(0.0, 0.55, 0.3),
                 hand_high=(0.0, 0.55, 0.3),
                 stack_goal_low=(-0.2, 0.55, 0.02),
                 stack_goal_high=(0.2, 0.75, 0.02),
                 fix_goal=False,
                 fixed_stack_goal=(0.0, 0.55, 0.02),
                 fixed_hand_goal=(0.0, 0.75, 0.3),
                 use_sparse_reward=False,
                 sparse_reward_threshold=0.05,
                 reset_free=False,
                 hide_goal_markers=False,
                 oracle_reset_prob=0.0,
                 xml_path='sawyer_xyz/three_blocks.xml',
                 **kwargs):
        self.quick_init(locals())
        MultitaskEnv.__init__(self)
        SawyerXYZEnv.__init__(self,
                              model_name=get_asset_full_path(xml_path),
                              **kwargs)

        self.block_low = np.array(block_low)
        self.block_high = np.array(block_high)
        self.block_radius = 0.02

        self.hand_low = np.array(hand_low)
        self.hand_high = np.array(hand_high)

        self.stack_goal_low = np.array(stack_goal_low)
        self.stack_goal_high = np.array(stack_goal_high)

        self.max_place_distance = max(
            np.linalg.norm((self.stack_goal_high - self.block_low), ord=2),
            np.linalg.norm((self.block_high - self.stack_goal_low), ord=2),
        )

        self.fix_goal = fix_goal
        self.fixed_stack_goal = np.array(fixed_stack_goal)

        self.use_sparse_reward = use_sparse_reward
        self.sparse_reward_threshold = sparse_reward_threshold

        self.reset_free = reset_free
        self.hide_goal_markers = hide_goal_markers
        self.oracle_reset_prob = oracle_reset_prob

        self.action_space = Box(np.array([-1, -1, -1, -1]),
                                np.array([1, 1, 1, 1]),
                                dtype=np.float32)

        diff = (self.block_high[0] - self.block_low[0]) / 3.0
        midpoint = diff + self.block_low[0]
        block_one_high = [midpoint, *self.block_high[1:]]
        block_two_low = [midpoint, *self.block_low[1:]]
        midpoint = midpoint + diff
        block_two_high = [midpoint, *self.block_high[1:]]
        block_three_low = [midpoint, *self.block_low[1:]]

        self.sampling_space = Box(np.hstack(
            ([0.0], self.hand_low, self.block_low, block_two_low,
             block_three_low)),
                                  np.hstack(
                                      ([0.04], self.hand_high, block_one_high,
                                       block_two_high, self.block_high)),
                                  dtype=np.float32)

        self.gripper_and_hand_and_blocks_space = Box(
            np.hstack(([0.0], self.hand_low, self.block_low, self.block_low,
                       self.block_low)),
            np.hstack(([0.04], self.hand_high, self.block_high,
                       self.block_high, self.block_high)),
            dtype=np.float32)

        self.observation_space = Dict([
            ('observation', self.gripper_and_hand_and_blocks_space),
            ('desired_goal', self.gripper_and_hand_and_blocks_space),
            ('achieved_goal', self.gripper_and_hand_and_blocks_space),
            ('state_observation', self.gripper_and_hand_and_blocks_space),
            ('state_desired_goal', self.gripper_and_hand_and_blocks_space),
            ('state_achieved_goal', self.gripper_and_hand_and_blocks_space),
            ('proprio_observation', self.gripper_and_hand_and_blocks_space),
            ('proprio_desired_goal', self.gripper_and_hand_and_blocks_space),
            ('proprio_achieved_goal', self.gripper_and_hand_and_blocks_space),
        ])

        self.block_one_id = self.model.get_joint_qpos_addr("blockOneJoint")
        self.block_two_id = self.model.get_joint_qpos_addr("blockTwoJoint")
        self.block_three_id = self.model.get_joint_qpos_addr("blockThreeJoint")
        self.reset()
Exemple #10
0
    def __init__(
            self,
            obj_low=None,
            obj_high=None,
            #tasks = [{'task': 'drawer' , 'obj_pos': np.array([0, 0.52 , 0.02]) , 'goal_pos': np.array([0, 0.81]) }] ,
            tasks=[{
                'task': 'drawer',
                'obj_init_pos': obj_init_pos,
                'padded_target_pos': np.array([0.1, 0, 0]),
                'door_pos': door_pos
            }],
            #tasks = None,
            pushRew_weight=1,
            goal_low=np.array([-0.5, 0.4, 0.05, None]),
            goal_high=np.array([0.5, 1., 0.5, None]),
            hand_init_pos=(0, 0.4, 0.1),
            doorHalfWidth=0.2,
            rewMode='posPlace',
            indicatorDist=0.05,
            image=False,
            image_dim=84,
            camera_name='angled_cam',
            mpl=150,
            hide_goal=True,
            change_task_every_episode=False,
            **kwargs):
        self.quick_init(locals())
        SawyerXYZEnv.__init__(self, model_name=self.model_name, **kwargs)
        self.sparse = False
        self.pushRew_weight = pushRew_weight

        if obj_low is None:
            obj_low = self.hand_low
        if obj_high is None:
            obj_high = self.hand_high

        if goal_high is None:
            goal_high = self.hand_high
        if goal_low is None:
            goal_low = self.hand_low

        #if doorGrasp_low is None:
        door_grasp_low = self.hand_low
        #if doorGrasp_high is None:
        door_grasp_high = self.hand_high

        self.push_task_id = 0
        self.door_task_id = 1
        self.drawer_task_id = 2

        self.doorHalfWidth = doorHalfWidth
        self.camera_name = camera_name
        self.objHeight = self.model.body_pos[-1][2]
        #assert self.objHeight != 0
        self.max_path_length = mpl
        self.image = image

        self.image_dim = image_dim

        self.tasks = np.array(tasks)
        self.change_task_every_episode = change_task_every_episode
        self.num_tasks = len(tasks)
        self.rewMode = rewMode
        self.Ind = indicatorDist
        self.hand_init_pos = np.array(hand_init_pos)
        self.action_space = Box(
            np.array([-1, -1, -1]),
            np.array([1, 1, 1]),
        )
        drawer_grasp_low = door_grasp_low
        drawer_grasp_high = door_grasp_high
        self.hand_env_space = Box(
            np.hstack(
                (self.hand_low, obj_low, door_grasp_low, drawer_grasp_low)),
            np.hstack((self.hand_high, obj_high, door_grasp_high,
                       drawer_grasp_high)),
        )
        self.goal_space = Box(goal_low, goal_high)
        #self.initialize_camera(sawyer_pusher_cam)
        self.push_info_logKeys = ['placeDist']
        self.door_info_logKeys = ['angleDelta']
        self.drawer_info_logKeys = ['posDelta']

        self.hide_goal = hide_goal
        if self.image:
            self.set_image_obsSpace()

        else:
            self.set_state_obsSpace()
    def __init__(
            self,
            hand_low=(-0.5, 0.40, 0.05),
            hand_high=(0.5, 1, 0.5),
            obj_low=None,
            obj_high=None,
            random_init=False,
            tasks = [{'goal': np.array([0.1, 0.8, 0.2]),  'obj_init_pos':np.array([0, 0.6, 0.02]), 'obj_init_angle': 0.3}], 
            goal_low=None,
            goal_high=None,
            hand_init_pos = (0, 0.6, 0.2),
            liftThresh = 0.04,
            rewMode = 'orig',
            rotMode='rotz',#'fixed',
            **kwargs
    ):
        self.quick_init(locals())
        SawyerXYZEnv.__init__(
            self,
            frame_skip=5,
            action_scale=1./100,
            hand_low=hand_low,
            hand_high=hand_high,
            model_name=self.model_name,
            **kwargs
        )
        if obj_low is None:
            obj_low = self.hand_low

        if goal_low is None:
            goal_low = self.hand_low

        if obj_high is None:
            obj_high = self.hand_high
        
        if goal_high is None:
            goal_high = self.hand_high

        self.random_init = random_init
        self.liftThresh = liftThresh
        self.max_path_length = 200#150
        self.tasks = tasks
        self.num_tasks = len(tasks)
        self.rewMode = rewMode
        self.rotMode = rotMode
        self.hand_init_pos = np.array(hand_init_pos)
        if rotMode == 'fixed':
            self.action_space = Box(
                np.array([-1, -1, -1, -1]),
                np.array([1, 1, 1, 1]),
            )
        elif rotMode == 'rotz':
            self.action_rot_scale = 1./50
            self.action_space = Box(
                np.array([-1, -1, -1, -np.pi, -1]),
                np.array([1, 1, 1, np.pi, 1]),
            )
        elif rotMode == 'quat':
            self.action_space = Box(
                np.array([-1, -1, -1, 0, -1, -1, -1, -1]),
                np.array([1, 1, 1, 2*np.pi, 1, 1, 1, 1]),
            )
        else:
            self.action_space = Box(
                np.array([-1, -1, -1, -np.pi/2, -np.pi/2, 0, -1]),
                np.array([1, 1, 1, np.pi/2, np.pi/2, np.pi*2, 1]),
            )
        self.hand_and_obj_space = Box(
            np.hstack((self.hand_low, obj_low)),
            np.hstack((self.hand_high, obj_high)),
        )
        self.goal_space = Box(goal_low, goal_high)
        self.observation_space = Box(
                np.hstack((self.hand_low, obj_low, obj_low)),
                np.hstack((self.hand_high, obj_high, obj_high)),
        )
Exemple #12
0
    def __init__(
            self,
            obj_low=None,
            obj_high=None,

            reward_type='hand_and_obj_distance',
            indicator_threshold=0.02,

            fix_goal=False,
            fixed_goal=None,
            goal_low=None,
            goal_high=None,
            hard_goals=False,

            hide_goal_markers=True,
            presampled_goals=None,
            num_goals_presampled=1000,
            norm_order=2,
            structure='2d',

            two_obj=False,
            frame_skip=100,
            reset_p=None,
            goal_p=None,

            fixed_reset=None,
            hide_state_markers=True,

            test_mode_case_num=None,
            **kwargs
    ):
        self.quick_init(locals())
        # self.obj_id[0] for first object, self.obj_id[1] for second object.
        self.obj_id = {
            0: '',
            1: '2',
        }

        assert structure in ['2d', '3d', '2d_wall_short', '2d_wall_tall', '2d_wall_tall_dark']
        self.structure = structure
        self.two_obj = two_obj
        self.hard_goals = hard_goals
        MultitaskEnv.__init__(self)
        SawyerXYZEnv.__init__(
            self,
            model_name=self.model_name,
            frame_skip=frame_skip,
            **kwargs
        )
        self.norm_order = norm_order
        if obj_low is None:
            obj_low = np.copy(self.hand_low)
            obj_low[2] = 0.0
        if obj_high is None:
            obj_high = np.copy(self.hand_high)
        self.obj_low = np.array(obj_low)
        self.obj_high = np.array(obj_high)
        if goal_low is None:
            goal_low = np.hstack((self.hand_low, obj_low))
        if goal_high is None:
            goal_high = np.hstack((self.hand_high, obj_high))

        self.reward_type = reward_type
        self.indicator_threshold = indicator_threshold

        self.subgoals = None
        self.fix_goal = fix_goal
        self._state_goal = None

        self.hide_goal_markers = hide_goal_markers
        self.hide_state_markers = hide_state_markers

        self.action_space = Box(
            np.array([-1, -1, -1, -1]),
            np.array([1, 1, 1, 1]),
            dtype=np.float32
        )
        if self.two_obj:
            self.hand_and_obj_space = Box(
                np.hstack((self.hand_low, obj_low, obj_low)),
                np.hstack((self.hand_high, obj_high, obj_high)),
                dtype=np.float32
            )
        else:
            self.hand_and_obj_space = Box(
                np.hstack((self.hand_low, obj_low)),
                np.hstack((self.hand_high, obj_high)),
                dtype=np.float32
            )
        self.hand_space = Box(
            self.hand_low,
            self.hand_high,
            dtype=np.float32
        )
        if self.two_obj:
            self.gripper_and_hand_and_obj_space = Box(
                np.hstack((self.hand_low, obj_low, obj_low, [0.0])),
                np.hstack((self.hand_high, obj_high, obj_high, [0.07])),
                dtype=np.float32
            )
        else:
            self.gripper_and_hand_and_obj_space = Box(
                np.hstack((self.hand_low, obj_low, [0.0])),
                np.hstack((self.hand_high, obj_high, [0.07])),
                dtype=np.float32
            )

        self.observation_space = Dict([
            ('observation', self.gripper_and_hand_and_obj_space),
            ('desired_goal', self.gripper_and_hand_and_obj_space),
            ('achieved_goal', self.gripper_and_hand_and_obj_space),
            ('state_observation', self.gripper_and_hand_and_obj_space),
            ('state_desired_goal', self.gripper_and_hand_and_obj_space),
            ('state_achieved_goal', self.gripper_and_hand_and_obj_space),
            ('proprio_observation', self.hand_space),
            ('proprio_desired_goal', self.hand_space),
            ('proprio_achieved_goal', self.hand_space),
        ])

        if presampled_goals is not None:
            self._presampled_goals = presampled_goals
            self.num_goals_presampled = len(list(self._presampled_goals.values)[0])
        else:
            # if num_goals_presampled > 0, presampled_goals will be created when sample_goal is first called
            self._presampled_goals = None
            self.num_goals_presampled = num_goals_presampled
        self.picked_up_object = False
        self.train_pickups = 0
        self.eval_pickups = 0
        self.cur_mode = 'train'

        self.obj_radius = np.array([0.018, 0.016]) #0.020
        self.ee_radius = np.array([0.053, 0.058]) #0.065
        if self.structure == '2d_wall_short':
            self.wall_radius = np.array([0.03, 0.02])
            self.wall_center = np.array([0.0, 0.60, 0.02])
        elif self.structure in ['2d_wall_tall', '2d_wall_tall_dark']:
            self.wall_radius = np.array([0.015, 0.04])
            self.wall_center = np.array([0.0, 0.60, 0.04])
        else:
            self.wall_radius = None
            self.wall_center = None
        # self.obj_radius = 0.020
        # self.ee_radius = 0.065
        if fixed_reset is not None:
            fixed_reset = np.array(fixed_reset)
        self.fixed_reset = fixed_reset
        if fixed_goal is not None:
            fixed_goal = np.array(fixed_goal)
        self.fixed_goal = fixed_goal
        self.reset_p = reset_p
        self.goal_p = goal_p

        self.test_mode_case_num = test_mode_case_num
        if self.test_mode_case_num == 1:
            self.fixed_reset = np.array([0.0, 0.50, 0.05, 0.0, 0.50, 0.015, 0.0, 0.70, 0.015])
            self.fixed_goal = np.array([0.0, 0.50, 0.10, 0.0, 0.70, 0.03, 0.0, 0.70, 0.015])
        elif self.test_mode_case_num == 2:
            self.fixed_reset = np.array([0.0, 0.50, 0.05, 0.0, 0.50, 0.015, 0.0, 0.70, 0.015])
            self.fixed_goal = np.array([0.0, 0.70, 0.10, 0.0, 0.50, 0.015, 0.0, 0.50, 0.03])
        elif self.test_mode_case_num == 3:
            self.fixed_reset = np.array([0.0, 0.60, 0.05, 0.0, 0.50, 0.015, 0.0, 0.70, 0.015])
            self.fixed_goal = np.array([0.0, 0.60, 0.05, 0.0, 0.70, 0.015, 0.0, 0.50, 0.015])

        if presampled_goals is not None:
            self.reset()
        else:
            self.num_goals_presampled = 1
            self.reset()
            self._presampled_goals = None
            self.num_goals_presampled = num_goals_presampled
    def __init__(
            self,
            hand_low=(-0.5, 0.40, 0.07),
            hand_high=(0.5, 1, 0.5),
            obj_low=None,
            obj_high=None,
            random_init=False,
            obs_type='plain',
            tasks = [{'goal': np.array([0.12, 0.7, 0.02]),  'obj_init_pos':np.array([-0.12, 0.7, 0.02]), 'obj_init_angle': 0.3}], 
            goal_low=None,
            goal_high=None,
            hand_init_pos = (0, 0.6, 0.2),
            liftThresh = 0.1,
            rewMode = 'orig',
            rotMode='fixed',
            multitask=False,
            multitask_num=1,
            if_render=False,
            **kwargs
    ):
        self.quick_init(locals())
        SawyerXYZEnv.__init__(
            self,
            frame_skip=5,
            action_scale=1./100,
            hand_low=hand_low,
            hand_high=hand_high,
            model_name=self.model_name,
            **kwargs
        )
        assert obs_type in OBS_TYPE
        if multitask:
            obs_type = 'with_goal_and_id'
        self.obs_type = obs_type
        if obj_low is None:
            obj_low = self.hand_low

        if goal_low is None:
            goal_low = self.hand_low

        if obj_high is None:
            obj_high = self.hand_high
        
        if goal_high is None:
            goal_high = self.hand_high

        self.random_init = random_init
        self.liftThresh = liftThresh
        self.max_path_length = 150
        self.tasks = tasks
        self.num_tasks = len(tasks)
        self.rewMode = rewMode
        self.rotMode = rotMode
        self.hand_init_pos = np.array(hand_init_pos)
        self.multitask = multitask
        self.multitask_num = multitask_num
        self._state_goal_idx = np.zeros(self.multitask_num)
        self.if_render = if_render
        if rotMode == 'fixed':
            self.action_space = Box(
                np.array([-1, -1, -1, -1]),
                np.array([1, 1, 1, 1]),
            )
        elif rotMode == 'quat':
            self.action_space = Box(
                np.array([-1, -1, -1, 0, -1, -1, -1, -1]),
                np.array([1, 1, 1, 2*np.pi, 1, 1, 1, 1]),
            )
        else:
            self.action_space = Box(
                np.array([-1, -1, -1, -np.pi/2, -np.pi/2, 0, -1]),
                np.array([1, 1, 1, np.pi/2, np.pi/2, np.pi*2, 1]),
            )
        self.hand_and_obj_space = Box(
            np.hstack((self.hand_low, obj_low)),
            np.hstack((self.hand_high, obj_high)),
        )
        self.goal_and_obj_space = Box(
            np.hstack((goal_low[:2], obj_low[:2])),
            np.hstack((goal_high[:2], obj_high[:2])),
        )
        self.goal_space = Box(goal_low[:2], goal_high[:2])
        if not multitask and self.obs_type == 'with_goal_id':
            self.observation_space = Box(
                    np.hstack((self.hand_low, obj_low, np.zeros(len(tasks)))),
                    np.hstack((self.hand_high, obj_high, np.ones(len(tasks)))),
            )
        elif not multitask and self.obs_type == 'plain':
            self.observation_space = Box(
                np.hstack((self.hand_low, obj_low,)),
                np.hstack((self.hand_high, obj_high,)),
            )
        elif not multitask and self.obs_type == 'with_goal':
            self.observation_space = Box(
                np.hstack((self.hand_low, obj_low, goal_low)),
                np.hstack((self.hand_high, obj_high, goal_high)),
            )
        else:
            self.observation_space = Box(
                    np.hstack((self.hand_low, obj_low, goal_low, np.zeros(multitask_num))),
                    np.hstack((self.hand_high, obj_high, goal_high, np.ones(multitask_num))),
            )
        # task = self.sample_task()
        # self._state_goal = np.array(task['goal'])
        # self.obj_init_pos = self.adjust_initObjPos(task['obj_init_pos'])
        # self.obj_init_angle = task['obj_init_angle']
        # self.objHeight = self.data.get_geom_xpos('objGeom')[2]
        # self.heightTarget = self.objHeight + self.liftThresh
        # self.curr_path_length = 0
        # self.maxPlacingDist = np.linalg.norm(np.array([self.obj_init_pos[0], self.obj_init_pos[1]]) - np.array(self._state_goal)) + self.heightTarget
        self.reset()
    def __init__(self,
                 puck_low=(-.4, .2),
                 puck_high=(.4, 1),
                 reward_type='state_distance',
                 norm_order=1,
                 indicator_threshold=0.06,
                 hand_low=(-0.28, 0.3, 0.05),
                 hand_high=(0.28, 0.9, 0.3),
                 fix_goal=False,
                 fixed_goal=(0.15, 0.6, 0.02, -0.15, 0.6),
                 goal_low=(-0.25, 0.3, 0.02, -.2, .4),
                 goal_high=(0.25, 0.875, 0.02, .2, .8),
                 hide_goal_markers=False,
                 init_puck_z=0.02,
                 init_hand_xyz=(0, 0.4, 0.07),
                 reset_free=False,
                 xml_path='sawyer_xyz/sawyer_push_puck.xml',
                 clamp_puck_on_step=False,
                 puck_radius=.07,
                 **kwargs):
        self.quick_init(locals())
        self.model_name = get_asset_full_path(xml_path)
        MultitaskEnv.__init__(self)
        SawyerXYZEnv.__init__(self,
                              hand_low=hand_low,
                              hand_high=hand_high,
                              model_name=self.model_name,
                              **kwargs)
        if puck_low is None:
            puck_low = self.hand_low[:2]
        if puck_high is None:
            puck_high = self.hand_high[:2]
        puck_low = np.array(puck_low)
        puck_high = np.array(puck_high)

        self.puck_low = puck_low
        self.puck_high = puck_high

        if goal_low is None:
            goal_low = np.hstack((self.hand_low, puck_low))
        if goal_high is None:
            goal_high = np.hstack((self.hand_high, puck_high))
        self.goal_low = np.array(goal_low)
        self.goal_high = np.array(goal_high)

        self.reward_type = reward_type
        self.norm_order = norm_order
        self.indicator_threshold = indicator_threshold

        self.fix_goal = fix_goal
        self.fixed_goal = np.array(fixed_goal)
        self._state_goal = None

        self.hide_goal_markers = hide_goal_markers

        self.action_space = Box(np.array([-1, -1, -1]),
                                np.array([1, 1, 1]),
                                dtype=np.float32)
        self.hand_and_puck_space = Box(np.hstack((self.hand_low, puck_low)),
                                       np.hstack((self.hand_high, puck_high)),
                                       dtype=np.float32)
        self.hand_space = Box(self.hand_low, self.hand_high, dtype=np.float32)
        self.observation_space = Dict([
            ('observation', self.hand_and_puck_space),
            ('desired_goal', self.hand_and_puck_space),
            ('achieved_goal', self.hand_and_puck_space),
            ('state_observation', self.hand_and_puck_space),
            ('state_desired_goal', self.hand_and_puck_space),
            ('state_achieved_goal', self.hand_and_puck_space),
            ('proprio_observation', self.hand_space),
            ('proprio_desired_goal', self.hand_space),
            ('proprio_achieved_goal', self.hand_space),
        ])
        self.init_puck_z = init_puck_z
        self.init_hand_xyz = np.array(init_hand_xyz)
        self._set_puck_xy(self.sample_puck_xy())
        self.reset_free = reset_free
        self.reset_counter = 0
        self.puck_space = Box(self.puck_low, self.puck_high, dtype=np.float32)
        self.clamp_puck_on_step = clamp_puck_on_step
        self.puck_radius = puck_radius
        self.reset()
Exemple #15
0
    def __init__(
            self,
            obj_low=None,
            obj_high=None,
            tasks = [{'goal': [0, 0.7, 0.02], 'obj_init_pos':[0, 0.6, 0.02]}] , 
            #tasks = None,
            goal_low=None,
            goal_high=None,
            hand_init_pos = (0, 0.4, 0.05),
            rewMode = 'posPlace',
            indicatorDist = 0.05,
            image = False,
            image_dim = 84,
            camera_name = 'robotview_zoomed',
            mpl = 150,
            hide_goal = True,
            hand_type = 'parallel_v1',
            n_tasks=15,
            **kwargs
    ):
        self.quick_init(locals()) 
        self.hand_type = hand_type       
        SawyerXYZEnv.__init__(
            self,
            hand_type = self.hand_type,
            model_name=self.model_name,
            **kwargs
        )
        if obj_low is None:
            obj_low = self.hand_low

        if goal_low is None:
            goal_low = self.hand_low

        if obj_high is None:
            obj_high = self.hand_high
        
        if goal_high is None:
            goal_high = self.hand_high

        self.camera_name = camera_name
        #self.objHeight = self.model.body_pos[-1][2]
        #assert self.objHeight != 0
        self.max_path_length = mpl
        self.image = image

        self.image_dim = image_dim
        self.task_temp = np.array(tasks)
        self.tasks = self.sample_tasks(n_tasks)
        self.num_tasks = len(tasks)
        self.rewMode = rewMode
        self.Ind = indicatorDist
        self.hand_init_pos = np.array(hand_init_pos)
        self.action_space = Box(
            np.array([-1, -1, -1]),
            np.array([1, 1, 1]),
        )
        self.hand_and_obj_space = Box(
            np.hstack((self.hand_low, obj_low)),
            np.hstack((self.hand_high, obj_high)),
        )
        self.goal_space = Box(goal_low, goal_high)
        #self.initialize_camera(sawyer_pusher_cam)
        self.info_logKeys = ['placeDist']
        self.hide_goal = hide_goal
        if self.image:
            self.set_image_obsSpace()

        else:
            self.set_state_obsSpace()
Exemple #16
0
    def __init__(
            self,
            obj_low=(-0.1, 0.35, 0.05),
            obj_high=(0.1, 0.55, 0.05),
            reward_type='dense',
            indicator_threshold=0.06,
            random_init=True,

            # hand_low=(-0.5, 0.40, 0.05),
            # hand_high=(0.5, 1, 0.5),
            hand_low=(-0.7, 0.30, 0.05),
            hand_high=(0.7, 1, 0.5),
            goal_low=None,
            goal_high=None,
            reset_free=False,
            num_beads=7,
            init_pos=(0., 0.4, 0.02),
            substeps=50,
            log_video=False,
            video_substeps=5,
            video_h=500,
            video_w=500,
            camera_name='leftcam',
            sparse=False,
            action_penalty_const=1e-2,
            rotMode='fixed',  #'fixed',
            **kwargs):
        self.quick_init(locals())
        MultitaskEnv.__init__(self)
        model = rope(num_beads=num_beads, init_pos=init_pos, texture=True)
        import os
        model.save(os.path.expanduser('~/sawyer_rope.xml'))
        with model.asfile() as f:
            SawyerXYZEnv.__init__(self,
                                  model_name=f.name,
                                  frame_skip=5,
                                  **kwargs)
        if obj_low is None:
            obj_low = self.hand_low
        if obj_high is None:
            obj_high = self.hand_high
        self.obj_low = obj_low
        self.obj_high = obj_high
        self.reset_free = reset_free

        #sim params
        self.num_beads = num_beads
        self.init_pos = init_pos
        self.substeps = substeps  # number of intermediate positions to generate

        #reward params
        self.sparse = sparse
        self.action_penalty_const = action_penalty_const

        #video params
        self.log_video = log_video
        self.video_substeps = video_substeps
        self.video_h = video_h
        self.video_w = video_w
        self.camera_name = camera_name

        self._state_goal = np.asarray([1.0, 0.0])
        self.rotMode = rotMode
        self.random_init = random_init
        if rotMode == 'fixed':
            self.action_space = Box(
                np.array([-1, -1, -1, -1]),
                np.array([1, 1, 1, 1]),
            )
        elif rotMode == 'rotz':
            self.action_rot_scale = 1. / 10
            self.action_space = Box(
                np.array([-1, -1, -1, -1, -1]),
                np.array([1, 1, 1, 1, 1]),
            )
        elif rotMode == 'quat':
            self.action_space = Box(
                np.array([-1, -1, -1, 0, -1, -1, -1, -1]),
                np.array([1, 1, 1, 2 * np.pi, 1, 1, 1, 1]),
            )
        else:
            self.action_space = Box(
                np.array([-1, -1, -1, -np.pi / 2, -np.pi / 2, 0, -1]),
                np.array([1, 1, 1, np.pi / 2, np.pi / 2, np.pi * 2, 1]),
            )

        self.obj_and_goal_space = Box(np.array(obj_low),
                                      np.array(obj_high),
                                      dtype=np.float32)
        self.hand_space = Box(self.hand_low, self.hand_high, dtype=np.float32)

        self.observation_space = Box(np.hstack(
            (self.hand_low, list(obj_low) * self.num_beads)),
                                     np.hstack(
                                         (self.hand_high,
                                          list(obj_high) * self.num_beads)),
                                     dtype=np.float32)

        self.hand_reset_pos = np.array([0, .6, .2])
        self.reset()
    def __init__(self,
                 puck_low=(-.4, .2),
                 puck_high=(.4, 1),
                 reward_type='state_distance',
                 norm_order=1,
                 indicator_threshold=0.06,
                 hand_low=(-0.28, 0.3, 0.05),
                 hand_high=(0.28, 0.9, 0.3),
                 fix_goal=False,
                 fixed_goal=(0.15, 0.6, 0.02, -0.15, 0.6),
                 goal_low=(-0.25, 0.3, 0.02, -.2, .4, -.2, .4),
                 goal_high=(0.25, 0.875, 0.02, .2, .8, .2, .8),
                 hide_goal_markers=False,
                 init_puck_z=0.02,
                 num_resets_before_puck_reset=1,
                 num_resets_before_hand_reset=1,
                 always_start_on_same_side=True,
                 goal_always_on_same_side=True,
                 **kwargs):
        self.quick_init(locals())
        MultitaskEnv.__init__(self)
        SawyerXYZEnv.__init__(self,
                              hand_low=hand_low,
                              hand_high=hand_high,
                              model_name=self.model_name,
                              **kwargs)
        if puck_low is None:
            puck_low = self.hand_low[:2]
        if puck_high is None:
            puck_high = self.hand_high[:2]
        puck_low = np.array(puck_low)
        puck_high = np.array(puck_high)

        self.puck_low = puck_low
        self.puck_high = puck_high

        if goal_low is None:
            goal_low = np.hstack((self.hand_low, puck_low, puck_low))
        if goal_high is None:
            goal_high = np.hstack((self.hand_high, puck_high, puck_high))
        self.goal_low = np.array(goal_low)
        self.goal_high = np.array(goal_high)

        self.reward_type = reward_type
        self.norm_order = norm_order
        self.indicator_threshold = indicator_threshold

        self.fix_goal = fix_goal
        self.fixed_goal = np.array(fixed_goal)
        self._state_goal = None

        self.hide_goal_markers = hide_goal_markers

        self.action_space = Box(np.array([-1, -1, -1]),
                                np.array([1, 1, 1]),
                                dtype=np.float32)
        self.hand_and_two_puck_space = Box(np.hstack(
            (self.hand_low, puck_low, puck_low)),
                                           np.hstack((self.hand_high,
                                                      puck_high, puck_high)),
                                           dtype=np.float32)
        self.hand_space = Box(self.hand_low, self.hand_high, dtype=np.float32)
        self.puck_space = Box(self.puck_low, self.puck_high, dtype=np.float32)
        self.observation_space = Dict([
            ('observation', self.hand_and_two_puck_space),
            ('desired_goal', self.hand_and_two_puck_space),
            ('achieved_goal', self.hand_and_two_puck_space),
            ('state_observation', self.hand_and_two_puck_space),
            ('state_desired_goal', self.hand_and_two_puck_space),
            ('state_achieved_goal', self.hand_and_two_puck_space),
            ('proprio_observation', self.hand_space),
            ('proprio_desired_goal', self.hand_space),
            ('proprio_achieved_goal', self.hand_space),
        ])
        self.init_puck_z = init_puck_z
        self.reset_counter = 0
        self.num_resets_before_puck_reset = num_resets_before_puck_reset
        self.num_resets_before_hand_reset = num_resets_before_hand_reset
        self._always_start_on_same_side = always_start_on_same_side
        self._goal_always_on_same_side = goal_always_on_same_side

        self._set_puck_xys(self._sample_puck_xys())
    def __init__(
            self,
            hand_low=(-0.5, 0.40, 0.05),
            hand_high=(0.5, 1, 0.5),
            obj_low=(-0.1, 0.6, 0.02),
            obj_high=(0.1, 0.7, 0.02),
            random_init=False,
            tasks = [{'goal': np.array([-0.1, 0.8, 0.2]),  'obj_init_pos':np.array([0, 0.6, 0.02]), 'obj_init_angle': 0.3}], 
            goal_low=(-0.1, 0.8, 0.05),
            goal_high=(0.1, 0.9, 0.3),
            hand_init_pos = (0, 0.6, 0.2),
            sampleMode='equal',
            rewMode = 'orig',
            rotMode='fixed',#'fixed',
            multitask=False,
            multitask_num=1,
            **kwargs
    ):
        self.quick_init(locals())
        SawyerXYZEnv.__init__(
            self,
            frame_skip=5,
            action_scale=1./100,
            hand_low=hand_low,
            hand_high=hand_high,
            model_name=self.model_name,
            **kwargs
        )
        if obj_low is None:
            obj_low = self.hand_low

        if goal_low is None:
            goal_low = self.hand_low

        if obj_high is None:
            obj_high = self.hand_high
        
        if goal_high is None:
            goal_high = self.hand_high

        self.random_init = random_init
        self.max_path_length = 150#150
        self.tasks = tasks
        self.num_tasks = len(tasks)
        self.rewMode = rewMode
        self.rotMode = rotMode
        self.sampleMode = sampleMode
        self.hand_init_pos = np.array(hand_init_pos)
        self.multitask = multitask
        if rotMode == 'fixed':
            self.action_space = Box(
                np.array([-1, -1, -1, -1]),
                np.array([1, 1, 1, 1]),
            )
        elif rotMode == 'rotz':
            self.action_rot_scale = 1./50
            self.action_space = Box(
                np.array([-1, -1, -1, -np.pi, -1]),
                np.array([1, 1, 1, np.pi, 1]),
            )
        elif rotMode == 'quat':
            self.action_space = Box(
                np.array([-1, -1, -1, 0, -1, -1, -1, -1]),
                np.array([1, 1, 1, 2*np.pi, 1, 1, 1, 1]),
            )
        else:
            self.action_space = Box(
                np.array([-1, -1, -1, -np.pi/2, -np.pi/2, 0, -1]),
                np.array([1, 1, 1, np.pi/2, np.pi/2, np.pi*2, 1]),
            )
        self.obj_and_goal_space = Box(
            np.hstack((obj_low, goal_low)),
            np.hstack((obj_high, goal_high)),
        )
        self.goal_space = Box(np.array(goal_low), np.array(goal_high))
        if not multitask:
            self.observation_space = Box(
                    np.hstack((self.hand_low, goal_low)),
                    np.hstack((self.hand_high, goal_high)),
            )
        else:
            self.observation_space = Box(
                    np.hstack((self.hand_low, obj_low, np.zeros(multitask_num))),
                    np.hstack((self.hand_high, obj_high, np.zeros(multitask_num))),
            )
        self.num_resets = 0
        self.reset()
    def __init__(
            self,
            obj_low=None,
            obj_high=None,
            tasks=[{
                'goal': np.array([0, 0.7, 0.02]),
                'height': 0.06,
                'obj_init_pos': np.array([0, 0.6, 0.02])
            }],
            goal_low=None,
            goal_high=None,
            hand_init_pos=(0, 0.4, 0.05),
            #hand_init_pos = (0, 0.5, 0.35) ,
            liftThresh=0.04,
            rewMode='orig',
            objType='block',
            **kwargs):

        self.objType = objType
        self.quick_init(locals())

        SawyerXYZEnv.__init__(self, model_name=self.model_name, **kwargs)
        if obj_low is None:
            obj_low = self.hand_low

        if goal_low is None:
            goal_low = self.hand_low

        if obj_high is None:
            obj_high = self.hand_high

        if goal_high is None:
            goal_high = self.hand_high

        #Make sure that objectGeom is the last geom in the XML
        self.objHeight = self.model.geom_pos[-1][2]
        self.heightTarget = self.objHeight + liftThresh

        self.max_path_length = 150

        self.tasks = tasks
        self.num_tasks = len(tasks)

        self.rewMode = rewMode

        # defaultTask = tasks[0]

        # self.obj_init_pos = np.array(tasksobj_init_pos)
        self.hand_init_pos = np.array(hand_init_pos)

        self.action_space = Box(
            np.array([-1, -1, -1, -1]),
            np.array([1, 1, 1, 1]),
        )
        self.hand_and_obj_space = Box(
            np.hstack((self.hand_low, obj_low)),
            np.hstack((self.hand_high, obj_high)),
        )

        self.goal_space = Box(goal_low, goal_high)

        self.observation_space = Dict([
            ('state_observation', self.hand_and_obj_space),
            ('state_desired_goal', self.goal_space),
            ('state_achieved_goal', self.goal_space),
        ])
Exemple #20
0
    def __init__(
            self,
            hand_low=(-0.5, 0.40, 0.05),
            hand_high=(0.5, 1, 0.5),
            obj_low=(-0.1, 0.7, 0.16),
            obj_high=(0.1, 0.9, 0.16),
            random_init=False,
            # tasks = [{'goal': 0.55,  'obj_init_pos':0.0, 'obj_init_angle': 0.3}],
            # tasks = [{'goal': np.array([0.1, 0.785, 0.15]),  'obj_init_pos':np.array([-0.1, 0.785, 0.15]), 'obj_init_angle': 0.3}],
            tasks=[{
                'goal': np.array([0.08, 0.785, 0.15]),
                'obj_init_pos': np.array([-0.1, 0.785, 0.15]),
                'obj_init_angle': 0.3
            }],
            goal_low=None,
            goal_high=None,
            obs_type='plain',
            hand_init_pos=(0, 0.6, 0.2),
            liftThresh=0.02,
            rewMode='orig',
            rotMode='fixed',  #'fixed',
            multitask=False,
            multitask_num=1,
            if_render=False,
            **kwargs):
        self.quick_init(locals())
        SawyerXYZEnv.__init__(self,
                              frame_skip=5,
                              action_scale=1. / 100,
                              hand_low=hand_low,
                              hand_high=hand_high,
                              model_name=self.model_name,
                              **kwargs)

        assert obs_type in OBS_TYPE
        if multitask:
            obs_type = 'with_goal_and_id'
        self.obs_type = obs_type

        if obj_low is None:
            obj_low = self.hand_low

        if goal_low is None:
            goal_low = self.hand_low

        if obj_high is None:
            obj_high = self.hand_high

        if goal_high is None:
            goal_high = self.hand_high

        self.random_init = random_init
        self.max_path_length = 150
        self.tasks = tasks
        self.num_tasks = len(tasks)
        self.rewMode = rewMode
        self.rotMode = rotMode
        self.hand_init_pos = np.array(hand_init_pos)
        self.multitask = multitask
        self.multitask_num = multitask_num
        self._state_goal_idx = np.zeros(self.multitask_num)
        self.if_render = if_render
        self.liftThresh = liftThresh
        if rotMode == 'fixed':
            self.action_space = Box(
                np.array([-1, -1, -1, -1]),
                np.array([1, 1, 1, 1]),
            )
        elif rotMode == 'rotz':
            self.action_rot_scale = 1. / 50
            self.action_space = Box(
                np.array([-1, -1, -1, -np.pi, -1]),
                np.array([1, 1, 1, np.pi, 1]),
            )
        elif rotMode == 'quat':
            self.action_space = Box(
                np.array([-1, -1, -1, 0, -1, -1, -1, -1]),
                np.array([1, 1, 1, 2 * np.pi, 1, 1, 1, 1]),
            )
        else:
            self.action_space = Box(
                np.array([-1, -1, -1, -np.pi / 2, -np.pi / 2, 0, -1]),
                np.array([1, 1, 1, np.pi / 2, np.pi / 2, np.pi * 2, 1]),
            )
        self.obj_and_goal_space = Box(
            np.array(obj_low),
            np.array(obj_high),
        )
        self.goal_space = Box(np.array(goal_low), np.array(goal_high))

        if not multitask and self.obs_type == 'with_goal_id':
            self.observation_space = Box(
                np.hstack((self.hand_low, obj_low, np.zeros(len(tasks)))),
                np.hstack((self.hand_high, obj_high, np.ones(len(tasks)))),
            )
        elif not multitask and self.obs_type == 'plain':
            self.observation_space = Box(
                np.hstack((
                    self.hand_low,
                    obj_low,
                )),
                np.hstack((
                    self.hand_high,
                    obj_high,
                )),
            )
        elif not multitask and self.obs_type == 'with_goal':
            self.observation_space = Box(
                np.hstack((self.hand_low, obj_low, goal_low)),
                np.hstack((self.hand_high, obj_high, goal_high)),
            )
        else:
            self.observation_space = Box(
                np.hstack((self.hand_low, obj_low, goal_low,
                           np.zeros(multitask_num))),
                np.hstack((self.hand_high, obj_high, goal_high,
                           np.zeros(multitask_num))),
            )
        self.reset()
    def __init__(

            self,
            obj_low=None,
            obj_high=None,

            tasks = [{'goal': [0, 0.7, 0.02], 'obj_init_pos':[0, 0.6, 0.02]}] , 

            goal_low=None,
            goal_high=None,

            hand_init_pos = (0, 0.4, 0.05),
          
          

            rewMode = 'posPlace',

            **kwargs
           
    ):
        self.quick_init(locals())
        
        SawyerXYZEnv.__init__(
            self,
            model_name=self.model_name,
            **kwargs
        )
        if obj_low is None:
            obj_low = self.hand_low

        if goal_low is None:
            goal_low = self.hand_low

        if obj_high is None:
            obj_high = self.hand_high
        
        if goal_high is None:
            goal_high = self.hand_high

       


        self.max_path_length = 150

        self.tasks = tasks
        self.num_tasks = len(tasks)
        

     
        self.hand_init_pos = np.array(hand_init_pos)

     
        self.action_space = Box(
            np.array([-1, -1, -1, -1, -1]),
            np.array([1, 1, 1, 1, 1]),
        )
        self.hand_and_obj_space = Box(
            np.hstack((self.hand_low, obj_low)),
            np.hstack((self.hand_high, obj_high)),
        )

        self.goal_space = Box(goal_low, goal_high)

        self.observation_space = Dict([
           
            ('state_observation', self.hand_and_obj_space),

            ('desired_goal', self.goal_space)
        ])

        self.reset()
Exemple #22
0
    def __init__(self,
                 goal_low=(0, ),
                 goal_high=(1.0472, ),
                 action_reward_scale=0,
                 reward_type='angle_difference',
                 indicator_threshold=(.02, .03),
                 fix_goal=False,
                 fixed_goal=(-.25),
                 reset_free=False,
                 fixed_hand_z=0.12,
                 hand_low=(-0.1, 0.45, 0.15),
                 hand_high=(0., 0.65, .225),
                 target_pos_scale=1,
                 target_angle_scale=1,
                 min_angle=0,
                 max_angle=1.0472,
                 xml_path='sawyer_xyz/sawyer_door_pull_hook.xml',
                 **sawyer_xyz_kwargs):
        self.quick_init(locals())
        self.model_name = get_asset_full_path(xml_path)
        SawyerXYZEnv.__init__(self,
                              self.model_name,
                              hand_low=hand_low,
                              hand_high=hand_high,
                              **sawyer_xyz_kwargs)
        MultitaskEnv.__init__(self)
        # self.initialize_camera(camera)
        self.reward_type = reward_type
        self.indicator_threshold = indicator_threshold

        self.fix_goal = fix_goal
        self.fixed_goal = np.array(fixed_goal)
        self.goal_space = Box(np.array(goal_low),
                              np.array(goal_high),
                              dtype=np.float32)
        self._state_goal = None
        self.fixed_hand_z = fixed_hand_z

        self.action_space = Box(np.array([-1, -1, -1]),
                                np.array([1, 1, 1]),
                                dtype=np.float32)
        self.state_space = Box(
            np.concatenate((hand_low, [min_angle])),
            np.concatenate((hand_high, [max_angle])),
            dtype=np.float32,
        )
        self.observation_space = Dict([
            ('observation', self.state_space),
            ('desired_goal', self.goal_space),
            ('achieved_goal', self.goal_space),
            ('state_observation', self.state_space),
            ('state_desired_goal', self.goal_space),
            ('state_achieved_goal', self.goal_space),
        ])
        self.action_reward_scale = action_reward_scale
        self.target_pos_scale = target_pos_scale
        self.target_angle_scale = target_angle_scale
        self.reset_free = reset_free
        self.door_angle_idx = self.model.get_joint_qpos_addr('doorjoint')
        #ensure env does not start in weird positions
        self.reset_free = True
        self.reset()
        self.reset_free = reset_free
    def __init__(
            self,
            filename='sawyer_multiobj.xml',
            mode_rel=np.array([True, True, True, True, False]),
            num_objects=3,
            object_mass=1,
            friction=1.0,
            finger_sensors=True,
            maxlen=0.12,
            minlen=0.01,
            preload_obj_dict=None,
            object_meshes=[
                'Bowl', 'GlassBowl', 'LotusBowl01', 'ElephantBowl',
                'RuggedBowl'
            ],
            obj_classname=None,
            block_height=0.02,
            block_width=0.02,
            cylinder_radius=0.05,
            viewer_image_height=84,
            viewer_image_width=84,
            skip_first=100,
            substeps=100,
            init_hand_xyz=([-0.00031741, 0.24092109, 0.08816302]),
            randomize_initial_pos=False,
            state_goal=None,
            randomize_goal_at_reset=True,
            hand_z_position=0.089,
            fix_z=True,
            fix_gripper=True,
            fix_rotation=True,
            fix_reset_pos=True,
            do_render=False,
            match_orientation=False,
            workspace_low=np.array([-0.31, 0.24, 0]),
            workspace_high=np.array([0.32, 0.7, 0.14]),
            hand_low=np.array([-0.31, 0.24, 0.0]),
            hand_high=np.array([0.32, 0.7, 0.14]),
            env_seed=0,
    ):
        self.quick_init(locals())

        self.seed(env_seed)

        base_filename = asset_base_path + filename

        friction_params = (friction, 0.1, 0.02)
        self.obj_stat_prop = create_object_xml(
            base_filename, num_objects, object_mass, friction_params,
            object_meshes, finger_sensors, maxlen, minlen, preload_obj_dict,
            obj_classname, block_height, block_width, cylinder_radius)

        gen_xml = create_root_xml(base_filename)
        SawyerXYZEnv.__init__(
            self,
            model_name=gen_xml,
            mocap_low=hand_low,
            mocap_high=hand_high,
        )
        clean_xml(gen_xml)

        if self.sim.model.nmocap > 0 and self.sim.model.eq_data is not None:
            for i in range(self.sim.model.eq_data.shape[0]):
                if self.sim.model.eq_type[i] == mujoco_py.const.EQ_WELD:
                    # Define the xyz + quat of the mocap relative to the hand
                    self.sim.model.eq_data[i, :] = np.array(
                        [0., 0., 0., 1., 0., 0., 0.])

        self._base_sdim, self._base_adim, self.mode_rel = 5, 5, mode_rel
        self.hand_low = np.array(hand_low)
        self.hand_high = np.array(hand_high)
        self.workspace_low = np.array(workspace_low)
        self.workspace_high = np.array(workspace_high)
        self.object_low = self.workspace_low.copy()
        self.object_high = self.workspace_high.copy()
        self.object_low[0], self.object_low[1] = self.object_low[0] + cylinder_radius, \
                                                 self.object_low[1] + cylinder_radius
        self.object_high[0], self.object_high[1] = self.object_high[0] - cylinder_radius, \
                                                 self.object_high[1] - cylinder_radius
        self.action_scale = 5 / 100
        self.num_objects, self.skip_first, self.substeps = num_objects, skip_first, substeps
        self.randomize_initial_pos = randomize_initial_pos
        self.finger_sensors, self._maxlen = finger_sensors, maxlen
        self._threshold = 0
        self._previous_target_qpos, self._n_joints = None, 7
        self._object_pos = np.zeros((num_objects, 7))
        self._reset_xyz = np.zeros((num_objects, 3))
        self._reset_quat = np.zeros((num_objects, 4))
        self._reset_pos = np.copy(self._object_pos)
        self._object_names = ['obj_' + str(i) for i in range(num_objects)]
        self._initialized = False
        self._state_goal = state_goal
        self.hand_z_position = hand_z_position
        self.fix_z = fix_z
        self.fix_rotation = fix_rotation
        self.fix_gripper = fix_gripper
        self._randomize_goal_at_reset = randomize_goal_at_reset
        self.do_render = do_render
        self.init_hand_xyz = np.array(init_hand_xyz)
        self.match_orientation = match_orientation
        self._object_reset_pos = self.sample_object_positions()
        self.goal_dim_per_object = 7 if match_orientation else 3
        self.cylinder_radius = cylinder_radius
        # self.finger_sensors = False # turning this off for now

        action_bounds = [
            1,
            1,
        ]
        if not fix_z:
            action_bounds.append(1)
        if not fix_gripper:
            action_bounds.append(1)
        if not fix_rotation:
            action_bounds.append(1)
        action_bounds = np.array(action_bounds)
        self.action_space = Box(-action_bounds,
                                action_bounds,
                                dtype=np.float32)
        # action space is 5-dim: x, y, z, ? ?

        self.hand_space = Box(self.hand_low, self.hand_high, dtype=np.float32)
        obs_dim = 3 * num_objects + 3
        obs_space = Box(np.array([-1] * obs_dim),
                        np.array([1] * obs_dim),
                        dtype=np.float32)
        goal_space = Box(
            np.array([-1] * self.goal_dim_per_object * num_objects),
            np.array([1] * self.goal_dim_per_object * num_objects),
            dtype=np.float32,
        )
        self.observation_space = Dict([
            ('observation', obs_space),
            ('desired_goal', goal_space),
            ('achieved_goal', goal_space),
            ('state_observation', obs_space),
            ('state_desired_goal', goal_space),
            ('state_achieved_goal', goal_space),
        ])

        self._state_goal = self.sample_goal()
        self.fix_reset_pos = fix_reset_pos
        o = self.reset()
        self.reset()
    def __init__(
            self,
            obj_low=None,
            obj_high=None,

            reward_type='hand_and_obj_distance',
            indicator_threshold=0.06,

            obj_init_positions=((0, 0.6, 0.02),),
            random_init=False,

            fix_goal=False,
            fixed_goal=(0.15, 0.6, 0.055, -0.15, 0.6),
            goal_low=None,
            goal_high=None,
            reset_free=False,

            hide_goal_markers=False,
            oracle_reset_prob=0.0,
            presampled_goals=None,
            num_goals_presampled=1000,
            p_obj_in_hand=.75,

            **kwargs
    ):
        self.quick_init(locals())
        MultitaskEnv.__init__(self)
        SawyerXYZEnv.__init__(
            self,
            model_name=self.model_name,
            **kwargs
        )
        if obj_low is None:
            obj_low = self.hand_low
        if obj_high is None:
            obj_high = self.hand_high
        self.obj_low = obj_low
        self.obj_high = obj_high
        if goal_low is None:
            goal_low = np.hstack((self.hand_low, obj_low))
        if goal_high is None:
            goal_high = np.hstack((self.hand_high, obj_high))

        self.reward_type = reward_type
        self.random_init = random_init
        self.p_obj_in_hand = p_obj_in_hand
        self.indicator_threshold = indicator_threshold

        self.obj_init_z = obj_init_positions[0][2]
        self.obj_init_positions = np.array(obj_init_positions)
        self.last_obj_pos = self.obj_init_positions[0]

        self.fix_goal = fix_goal
        self.fixed_goal = np.array(fixed_goal)
        self._state_goal = None
        self.reset_free = reset_free
        self.oracle_reset_prob = oracle_reset_prob

        self.hide_goal_markers = hide_goal_markers

        self.action_space = Box(
            np.array([-1, -1, -1, -1]),
            np.array([1, 1, 1, 1]),
            dtype=np.float32
        )
        self.hand_and_obj_space = Box(
            np.hstack((self.hand_low, obj_low)),
            np.hstack((self.hand_high, obj_high)),
            dtype=np.float32
        )
        self.hand_space = Box(
            self.hand_low,
            self.hand_high,
            dtype=np.float32
        )
        self.gripper_and_hand_and_obj_space = Box(
            np.hstack(([0.0], self.hand_low, obj_low)),
            np.hstack(([0.04], self.hand_high, obj_high)),
            dtype=np.float32
        )

        self.observation_space = Dict([
            ('observation', self.gripper_and_hand_and_obj_space),
            ('desired_goal', self.hand_and_obj_space),
            ('achieved_goal', self.hand_and_obj_space),
            ('state_observation', self.gripper_and_hand_and_obj_space),
            ('state_desired_goal', self.hand_and_obj_space),
            ('state_achieved_goal', self.hand_and_obj_space),
            ('proprio_observation', self.hand_space),
            ('proprio_desired_goal', self.hand_space),
            ('proprio_achieved_goal', self.hand_space),
        ])
        self.hand_reset_pos = np.array([0, .6, .2])

        if presampled_goals is not None:
            self._presampled_goals = presampled_goals
            self.num_goals_presampled = len(list(self._presampled_goals.values)[0])
        else:
            # presampled_goals will be created when sample_goal is first called
            self._presampled_goals = None
            self.num_goals_presampled = num_goals_presampled
        self.picked_up_object = False
        self.train_pickups = 0
        self.eval_pickups = 0
        self.cur_mode = 'train'
        self.reset()
    def __init__(self,
                 reward_type='puck_distance',
                 norm_order=2,
                 indicator_threshold=0.06,
                 fix_goal=False,
                 fixed_goal=(0.0, 0.6, 0.02, -0.15, 0.6),
                 puck_low=None,
                 puck_high=None,
                 hand_low=(-0.25, 0.3, 0.02),
                 hand_high=(0.25, .8, .02),
                 goal_low=(-0.25, 0.3),
                 goal_high=(0.25, 0.8),
                 hide_goal_markers=False,
                 init_puck_z=0.02,
                 reset_free=False,
                 init_puck_pos=(0.0, .65),
                 mode='train',
                 **kwargs):
        self.quick_init(locals())
        MultitaskEnv.__init__(self)
        SawyerXYZEnv.__init__(self,
                              hand_low=hand_low,
                              hand_high=hand_high,
                              model_name=self.model_name,
                              **kwargs)
        if puck_low is None:
            puck_low = self.hand_low[:2]
        if puck_high is None:
            puck_high = self.hand_high[:2]
        puck_low = np.array(puck_low)
        puck_high = np.array(puck_high)

        self.puck_low = puck_low
        self.puck_high = puck_high

        if goal_low is None:
            goal_low = np.array(puck_low)
        if goal_high is None:
            goal_high = np.array(puck_high)

        self.goal_low = np.array(goal_low)
        self.goal_high = np.array(goal_high)

        self.reward_type = reward_type
        self.norm_order = norm_order
        self.indicator_threshold = indicator_threshold

        self.fix_goal = fix_goal
        self.fixed_goal = np.array(fixed_goal)
        self._state_goal = None

        self.hide_goal_markers = hide_goal_markers

        self.action_space = Box(np.array([-1, -1, -1]), np.array([1, 1, 1]))
        self.hand_and_puck_space = Box(
            np.hstack((self.hand_low, puck_low)),
            np.hstack((self.hand_high, puck_high)),
        )
        self.puck_space = Box(puck_low, puck_high)
        self.hand_space = Box(self.hand_low, self.hand_high)
        self.observation_space = Dict([
            ('observation', self.hand_and_puck_space),
            ('desired_goal', self.puck_space),
            ('achieved_goal', self.puck_space),
            ('state_observation', self.hand_and_puck_space),
            ('state_desired_goal', self.puck_space),
            ('state_achieved_goal', self.puck_space),
        ])
        self.init_puck_z = init_puck_z
        self.reset_free = reset_free
        self.puck_pos = self.get_puck_pos()[:2]
        self.mode(mode)
        self.init_puck_pos = np.array(init_puck_pos)