Example #1
0
    def __init__(self,
                 puck_low=(-.4, .2),
                 puck_high=(.4, 1),
                 reward_type='state_distance',
                 norm_order=1,
                 indicator_threshold=0.06,
                 hand_low=(-0.28, 0.3, 0.05),
                 hand_high=(0.28, 0.9, 0.3),
                 fix_goal=False,
                 fixed_goal=(0.15, 0.6, 0.02, -0.15, 0.6),
                 goal_low=(-0.25, 0.3, 0.02, -.2, .4, -.2, .4),
                 goal_high=(0.25, 0.875, 0.02, .2, .8, .2, .8),
                 hide_goal_markers=False,
                 init_puck_z=0.02,
                 num_resets_before_puck_reset=1,
                 num_resets_before_hand_reset=1,
                 always_start_on_same_side=True,
                 goal_always_on_same_side=True,
                 **kwargs):
        self.quick_init(locals())
        MultitaskEnv.__init__(self)
        SawyerXYZEnv.__init__(self,
                              hand_low=hand_low,
                              hand_high=hand_high,
                              model_name=self.model_name,
                              **kwargs)
        if puck_low is None:
            puck_low = self.hand_low[:2]
        if puck_high is None:
            puck_high = self.hand_high[:2]
        puck_low = np.array(puck_low)
        puck_high = np.array(puck_high)

        self.puck_low = puck_low
        self.puck_high = puck_high

        if goal_low is None:
            goal_low = np.hstack((self.hand_low, puck_low, puck_low))
        if goal_high is None:
            goal_high = np.hstack((self.hand_high, puck_high, puck_high))
        self.goal_low = np.array(goal_low)
        self.goal_high = np.array(goal_high)

        self.reward_type = reward_type
        self.norm_order = norm_order
        self.indicator_threshold = indicator_threshold

        self.fix_goal = fix_goal
        self.fixed_goal = np.array(fixed_goal)
        self._state_goal = None

        self.hide_goal_markers = hide_goal_markers

        self.action_space = Box(np.array([-1, -1, -1]), np.array([1, 1, 1]))
        self.hand_and_two_puck_space = Box(
            np.hstack((self.hand_low, puck_low, puck_low)),
            np.hstack((self.hand_high, puck_high, puck_high)),
        )
        self.hand_space = Box(self.hand_low, self.hand_high)
        self.puck_space = Box(self.puck_low, self.puck_high)
        self.observation_space = Dict([
            ('observation', self.hand_and_two_puck_space),
            ('desired_goal', self.hand_and_two_puck_space),
            ('achieved_goal', self.hand_and_two_puck_space),
            ('state_observation', self.hand_and_two_puck_space),
            ('state_desired_goal', self.hand_and_two_puck_space),
            ('state_achieved_goal', self.hand_and_two_puck_space),
            ('proprio_observation', self.hand_space),
            ('proprio_desired_goal', self.hand_space),
            ('proprio_achieved_goal', self.hand_space),
        ])
        self.init_puck_z = init_puck_z
        self.reset_counter = 0
        self.num_resets_before_puck_reset = num_resets_before_puck_reset
        self.num_resets_before_hand_reset = num_resets_before_hand_reset
        self._always_start_on_same_side = always_start_on_same_side
        self._goal_always_on_same_side = goal_always_on_same_side

        self._set_puck_xys(self._sample_puck_xys())
    def __init__(
            self,
            obj_low=None,
            obj_high=None,

            reward_type='hand_and_obj_distance',
            indicator_threshold=0.06,

            obj_init_positions=((0, 0.6, 0.02),),
            random_init=False,

            fix_goal=False,
            fixed_goal=(0.15, 0.6, 0.055, -0.15, 0.6),
            goal_low=None,
            goal_high=None,
            reset_free=False,

            hide_goal_markers=False,
            oracle_reset_prob=0.0,
            presampled_goals=None,
            num_goals_presampled=1000,
            p_obj_in_hand=.75,

            **kwargs
    ):
        self.quick_init(locals())
        MultitaskEnv.__init__(self)
        SawyerXYZEnv.__init__(
            self,
            model_name=self.model_name,
            **kwargs
        )
        if obj_low is None:
            obj_low = self.hand_low
        if obj_high is None:
            obj_high = self.hand_high
        self.obj_low = obj_low
        self.obj_high = obj_high
        if goal_low is None:
            goal_low = np.hstack((self.hand_low, obj_low))
        if goal_high is None:
            goal_high = np.hstack((self.hand_high, obj_high))

        self.reward_type = reward_type
        self.random_init = random_init
        self.p_obj_in_hand = p_obj_in_hand
        self.indicator_threshold = indicator_threshold

        self.obj_init_z = obj_init_positions[0][2]
        self.obj_init_positions = np.array(obj_init_positions)
        self.last_obj_pos = self.obj_init_positions[0]

        self.fix_goal = fix_goal
        self.fixed_goal = np.array(fixed_goal)
        self._state_goal = None
        self.reset_free = reset_free
        self.oracle_reset_prob = oracle_reset_prob

        self.hide_goal_markers = hide_goal_markers

        self.action_space = Box(
            np.array([-1, -1, -1, -1]),
            np.array([1, 1, 1, 1]),
            dtype=np.float32
        )
        self.hand_and_obj_space = Box(
            np.hstack((self.hand_low, obj_low)),
            np.hstack((self.hand_high, obj_high)),
            dtype=np.float32
        )
        self.hand_space = Box(
            self.hand_low,
            self.hand_high,
            dtype=np.float32
        )
        self.gripper_and_hand_and_obj_space = Box(
            np.hstack(([0.0], self.hand_low, obj_low)),
            np.hstack(([0.04], self.hand_high, obj_high)),
            dtype=np.float32
        )

        self.observation_space = Dict([
            ('observation', self.gripper_and_hand_and_obj_space),
            ('desired_goal', self.hand_and_obj_space),
            ('achieved_goal', self.hand_and_obj_space),
            ('state_observation', self.gripper_and_hand_and_obj_space),
            ('state_desired_goal', self.hand_and_obj_space),
            ('state_achieved_goal', self.hand_and_obj_space),
            ('proprio_observation', self.hand_space),
            ('proprio_desired_goal', self.hand_space),
            ('proprio_achieved_goal', self.hand_space),
        ])
        self.hand_reset_pos = np.array([0, .6, .2])

        if presampled_goals is not None:
            self._presampled_goals = presampled_goals
            self.num_goals_presampled = len(list(self._presampled_goals.values())[0])
        else:
            # presampled_goals will be created when sample_goal is first called
            self._presampled_goals = None
            self.num_goals_presampled = num_goals_presampled
        self.picked_up_object = False
        self.train_pickups = 0
        self.eval_pickups = 0
        self.cur_mode = 'train'
        self.reset()
Example #3
0
    def __init__(self,
                 puck_low=(-.4, .2),
                 puck_high=(.4, 1),
                 reward_type='state_distance',
                 norm_order=1,
                 indicator_threshold=0.06,
                 hand_low=(-0.28, 0.3, 0.05),
                 hand_high=(0.28, 0.9, 0.3),
                 fix_goal=False,
                 fixed_goal=(0.15, 0.6, 0.02, -0.15, 0.6),
                 goal_low=(-0.25, 0.3, 0.02, -.2, .4),
                 goal_high=(0.25, 0.875, 0.02, .2, .8),
                 hide_goal_markers=False,
                 init_puck_z=0.02,
                 init_hand_xyz=(0, 0.4, 0.07),
                 reset_free=False,
                 xml_path='sawyer_xyz/sawyer_push_puck.xml',
                 clamp_puck_on_step=False,
                 puck_radius=.07,
                 max_steps=250,
                 **kwargs):
        self.quick_init(locals())
        self.model_name = get_asset_full_path(xml_path)
        MultitaskEnv.__init__(self)
        SawyerXYZEnv.__init__(self,
                              hand_low=hand_low,
                              hand_high=hand_high,
                              model_name=self.model_name,
                              **kwargs)
        if puck_low is None:
            puck_low = self.hand_low[:2]
        if puck_high is None:
            puck_high = self.hand_high[:2]
        puck_low = np.array(puck_low)
        puck_high = np.array(puck_high)

        self.puck_low = puck_low
        self.puck_high = puck_high

        if goal_low is None:
            goal_low = np.hstack((self.hand_low, puck_low))
        if goal_high is None:
            goal_high = np.hstack((self.hand_high, puck_high))
        self.goal_low = np.array(goal_low)
        self.goal_high = np.array(goal_high)

        self.reward_type = reward_type
        self.norm_order = norm_order
        self.indicator_threshold = indicator_threshold

        self.fix_goal = fix_goal
        self.fixed_goal = np.array(fixed_goal)
        self._state_goal = None

        self.hide_goal_markers = hide_goal_markers

        self.action_space = Box(np.array([-1, -1, -1]),
                                np.array([1, 1, 1]),
                                dtype=np.float32)
        self.hand_and_puck_space = Box(np.hstack((self.hand_low, puck_low)),
                                       np.hstack((self.hand_high, puck_high)),
                                       dtype=np.float32)
        self.hand_space = Box(self.hand_low, self.hand_high, dtype=np.float32)
        self.observation_space = Dict([
            ('observation', self.hand_and_puck_space),
            ('desired_goal', self.hand_and_puck_space),
            ('achieved_goal', self.hand_and_puck_space),
            ('state_observation', self.hand_and_puck_space),
            ('state_desired_goal', self.hand_and_puck_space),
            ('state_achieved_goal', self.hand_and_puck_space),
            ('proprio_observation', self.hand_space),
            ('proprio_desired_goal', self.hand_space),
            ('proprio_achieved_goal', self.hand_space),
        ])
        self.init_puck_z = init_puck_z
        self.init_hand_xyz = np.array(init_hand_xyz)
        self._set_puck_xy(self.sample_puck_xy())
        self.reset_free = reset_free
        self.reset_counter = 0
        self.puck_space = Box(self.puck_low, self.puck_high, dtype=np.float32)
        self.clamp_puck_on_step = clamp_puck_on_step
        self.puck_radius = puck_radius

        self.max_steps = max_steps
        self._current_steps = 0

        self.reset()
    def __init__(self,
                 reward_type='puck_distance',
                 norm_order=2,
                 indicator_threshold=0.06,
                 fix_goal=False,
                 fixed_goal=(0.0, 0.6, 0.02, -0.15, 0.6),
                 puck_low=None,
                 puck_high=None,
                 hand_low=(-0.25, 0.3, 0.02),
                 hand_high=(0.25, .8, .02),
                 goal_low=(-0.25, 0.3),
                 goal_high=(0.25, 0.8),
                 hide_goal_markers=False,
                 init_puck_z=0.02,
                 reset_free=False,
                 init_puck_pos=(0.0, .65),
                 mode='train',
                 **kwargs):
        self.quick_init(locals())
        MultitaskEnv.__init__(self)
        SawyerXYZEnv.__init__(self,
                              hand_low=hand_low,
                              hand_high=hand_high,
                              model_name=self.model_name,
                              **kwargs)
        if puck_low is None:
            puck_low = self.hand_low[:2]
        if puck_high is None:
            puck_high = self.hand_high[:2]
        puck_low = np.array(puck_low)
        puck_high = np.array(puck_high)

        self.puck_low = puck_low
        self.puck_high = puck_high

        if goal_low is None:
            goal_low = np.array(puck_low)
        if goal_high is None:
            goal_high = np.array(puck_high)

        self.goal_low = np.array(goal_low)
        self.goal_high = np.array(goal_high)

        self.reward_type = reward_type
        self.norm_order = norm_order
        self.indicator_threshold = indicator_threshold

        self.fix_goal = fix_goal
        self.fixed_goal = np.array(fixed_goal)
        self._state_goal = None

        self.hide_goal_markers = hide_goal_markers

        self.action_space = Box(np.array([-1, -1, -1]), np.array([1, 1, 1]))
        self.hand_and_puck_space = Box(
            np.hstack((self.hand_low, puck_low)),
            np.hstack((self.hand_high, puck_high)),
        )
        self.puck_space = Box(puck_low, puck_high)
        self.hand_space = Box(self.hand_low, self.hand_high)
        self.observation_space = Dict([
            ('observation', self.hand_and_puck_space),
            ('desired_goal', self.puck_space),
            ('achieved_goal', self.puck_space),
            ('state_observation', self.hand_and_puck_space),
            ('state_desired_goal', self.puck_space),
            ('state_achieved_goal', self.puck_space),
        ])
        self.init_puck_z = init_puck_z
        self.reset_free = reset_free
        self.puck_pos = self.get_puck_pos()[:2]
        self.mode(mode)
        self.init_puck_pos = np.array(init_puck_pos)
    def __init__(
            self,
            obj_low=None,
            obj_high=None,

            hand_low=(-0.1, 0.55, 0.05),
            hand_high=(0.05, 0.65, 0.2),

            reward_type='hand_and_obj_distance',
            indicator_threshold=0.06,
            frame_skip = 50,
            obj_init_positions=((0, 0.6, 0.02),),
            random_init=False,

            num_objects=1,
            fix_goal=False,

            mocap_low=(-0.13, 0.57, 0.04),
            mocap_high=(0.08, 0.73, 0.2),
            action_scale=0.02,

            fixed_goal=(0.15, 0.6, 0.055, -0.15, 0.6),
            goal_low=None,
            goal_high=None,
            reset_free=False,
            filename='sawyer_pick_and_place_multiobj.xml',
            object_mass=1,
            # object_meshes=['Bowl', 'GlassBowl', 'LotusBowl01', 'ElephantBowl', 'RuggedBowl'],
            object_meshes=None,
            obj_classname = None,
            block_height=0.04,
            block_width = 0.04,
            cylinder_radius = 0.015,
            finger_sensors=False,
            maxlen=0.06,
            minlen=0.01,
            preload_obj_dict=None,
            hide_goal_markers=False,
            oracle_reset_prob=0.0,
            presampled_goals=None,
            num_goals_presampled=2,
            p_obj_in_hand=1,

            **kwargs
    ):
        self.quick_init(locals())
        MultitaskEnv.__init__(self)


        base_filename = asset_base_path + filename
        friction_params = (1, 1, 2)
        self.obj_stat_prop = create_object_xml(base_filename, num_objects, object_mass,
                                              friction_params, object_meshes, finger_sensors,
                                               maxlen, minlen, preload_obj_dict, obj_classname,
                                               block_height, block_width, cylinder_radius)

        gen_xml = create_root_xml(base_filename)
        MujocoEnv.__init__(self, gen_xml, frame_skip=frame_skip)
        self.reset_mocap_welds()

        clean_xml(gen_xml)

        self.mocap_low = mocap_low
        self.mocap_high = mocap_high
        self.action_scale = action_scale


        self.hand_low = np.array(hand_low)
        self.hand_high = np.array(hand_high)

        if obj_low is None:
            obj_low = self.hand_low
        if obj_high is None:
            obj_high = self.hand_high
        self.obj_low = obj_low
        self.obj_high = obj_high
        if goal_low is None:
            goal_low = np.hstack((self.hand_low, obj_low))
        if goal_high is None:
            goal_high = np.hstack((self.hand_high, obj_high))

        self.reward_type = reward_type
        self.random_init = random_init
        self.p_obj_in_hand = p_obj_in_hand
        self.indicator_threshold = indicator_threshold

        self.obj_init_z = obj_init_positions[0][2]
        self.obj_init_positions = np.array(obj_init_positions)
        self.last_obj_pos = self.obj_init_positions[0]

        self.fix_goal = fix_goal
        self.fixed_goal = np.array(fixed_goal)
        self._state_goal = None
        self.reset_free = reset_free
        self.oracle_reset_prob = oracle_reset_prob

        self.obj_ind_to_manip = 0

        self.hide_goal_markers = hide_goal_markers

        self.action_space = Box(
            np.array([-1, -1, -1, -1]),
            np.array([1, 1, 1, 1]),
            dtype=np.float32
        )
        self.hand_and_obj_space = Box(
            np.hstack((self.hand_low, obj_low)),
            np.hstack((self.hand_high, obj_high)),
            dtype=np.float32
        )
        self.hand_space = Box(
            self.hand_low,
            self.hand_high,
            dtype=np.float32
        )
        self.gripper_and_hand_and_obj_space = Box(
            np.hstack(([0.0], self.hand_low, obj_low)),
            np.hstack(([0.04], self.hand_high, obj_high)),
            dtype=np.float32
        )
        self.num_objects = num_objects
        self.maxlen = maxlen

        self.observation_space = Dict([
            ('observation', self.gripper_and_hand_and_obj_space),
            ('desired_goal', self.hand_and_obj_space),
            ('achieved_goal', self.hand_and_obj_space),
            ('state_observation', self.gripper_and_hand_and_obj_space),
            ('state_desired_goal', self.hand_and_obj_space),
            ('state_achieved_goal', self.hand_and_obj_space),
            ('proprio_observation', self.hand_space),
            ('proprio_desired_goal', self.hand_space),
            ('proprio_achieved_goal', self.hand_space),
        ])
        self.hand_reset_pos = np.array([0, .6, .2])

        if presampled_goals is not None:
            self._presampled_goals = presampled_goals
            self.num_goals_presampled = len(list(self._presampled_goals.values)[0])
        else:
            # presampled_goals will be created when sample_goal is first called
            self._presampled_goals = None
            self.num_goals_presampled = num_goals_presampled
        self.num_goals_presampled = 10
        self.picked_up_object = False
        self.train_pickups = 0
        self.eval_pickups = 0
        self.cur_mode = 'train'
        self.reset()
    def __init__(
            self,
            obj_low=None,
            obj_high=None,

            reward_type='hand_and_obj_distance',
            indicator_threshold=0.06,

            obj_init_pos=(0, 0.6, 0.02),

            fix_goal=False,
            fixed_goal=(0.15, 0.6, 0.055, -0.15, 0.6),
            goal_low=None,
            goal_high=None,

            hide_goal_markers=False,

            **kwargs
    ):
        self.quick_init(locals())
        MultitaskEnv.__init__(self)
        SawyerXYZEnv.__init__(
            self,
            model_name=self.model_name,
            **kwargs
        )
        if obj_low is None:
            obj_low = self.hand_low
        if obj_high is None:
            obj_high = self.hand_high

        if goal_low is None:
            goal_low = np.hstack((self.hand_low, obj_low))
        if goal_high is None:
            goal_high = np.hstack((self.hand_high, obj_high))

        self.reward_type = reward_type
        self.indicator_threshold = indicator_threshold

        self.obj_init_pos = np.array(obj_init_pos)

        self.fix_goal = fix_goal
        self.fixed_goal = np.array(fixed_goal)
        self._state_goal = None

        self.hide_goal_markers = hide_goal_markers

        self.action_space = Box(
            np.array([-1, -1, -1, -1]),
            np.array([1, 1, 1, 1]),
        )
        self.hand_and_obj_space = Box(
            np.hstack((self.hand_low, obj_low)),
            np.hstack((self.hand_high, obj_high)),
        )
        self.observation_space = Dict([
            ('observation', self.hand_and_obj_space),
            ('desired_goal', self.hand_and_obj_space),
            ('achieved_goal', self.hand_and_obj_space),
            ('state_observation', self.hand_and_obj_space),
            ('state_desired_goal', self.hand_and_obj_space),
            ('state_achieved_goal', self.hand_and_obj_space),
        ])
Example #7
0
    def __init__(self,
                 goal_low=None,
                 goal_high=None,
                 action_reward_scale=0,
                 reward_type='angle_difference',
                 indicator_threshold=(.02, .03),
                 fix_goal=False,
                 fixed_goal=(0, .45, .12, -.25),
                 reset_free=False,
                 fixed_hand_z=0.12,
                 hand_low=(-0.25, 0.3, .12),
                 hand_high=(0.25, 0.6, .12),
                 target_pos_scale=1,
                 target_angle_scale=1,
                 min_angle=-1.5708,
                 max_angle=0,
                 xml_path='sawyer_xyz/sawyer_door_pull.xml',
                 **sawyer_xyz_kwargs):
        self.quick_init(locals())
        self.model_name = get_asset_full_path(xml_path)
        SawyerXYZEnv.__init__(self,
                              self.model_name,
                              hand_low=hand_low,
                              hand_high=hand_high,
                              **sawyer_xyz_kwargs)
        MultitaskEnv.__init__(self)

        self.reward_type = reward_type
        self.indicator_threshold = indicator_threshold

        self.fix_goal = fix_goal
        self.fixed_goal = np.array(fixed_goal)
        self._state_goal = None
        self.fixed_hand_z = fixed_hand_z

        self.action_space = Box(np.array([-1, -1]),
                                np.array([1, 1]),
                                dtype=np.float32)
        self.state_space = Box(
            np.concatenate((hand_low, [min_angle])),
            np.concatenate((hand_high, [max_angle])),
            dtype=np.float32,
        )
        if goal_low is None:
            goal_low = self.state_space.low
        if goal_high is None:
            goal_high = self.state_space.high
        self.goal_space = Box(
            np.array(goal_low),
            np.array(goal_high),
            dtype=np.float32,
        )
        self.observation_space = Dict([
            ('observation', self.state_space),
            ('desired_goal', self.goal_space),
            ('achieved_goal', self.state_space),
            ('state_observation', self.state_space),
            ('state_desired_goal', self.goal_space),
            ('state_achieved_goal', self.state_space),
        ])
        self.action_reward_scale = action_reward_scale
        self.target_pos_scale = target_pos_scale
        self.target_angle_scale = target_angle_scale
        self.reset_free = reset_free
        self.door_angle_idx = self.model.get_joint_qpos_addr('doorjoint')
        self.reset()
Example #8
0
    def __init__(
            self,
            reward_type='state_distance',
            norm_order=2,
            action_scale=20,
            frame_skip=3,
            two_frames=False,
            vel_in_state=True,
            z_in_state=True,
            car_low=(-1.90, -1.90),  #list([-1.60, -1.60]),
            car_high=(1.90, 1.90),  #list([1.60, 1.60]),
            goal_low=(-1.90, -1.90),  #list([-1.60, -1.60]),
            goal_high=(1.90, 1.90),  #list([1.60, 1.60]),
            model_path='wheeled_car.xml',
            reset_low=None,
            reset_high=None,
            *args,
            **kwargs):
        self.quick_init(locals())
        MultitaskEnv.__init__(self)
        MujocoEnv.__init__(self,
                           model_path=get_asset_full_path('locomotion/' +
                                                          model_path),
                           frame_skip=frame_skip,
                           **kwargs)

        self.action_space = Box(np.array([-1, -1]),
                                np.array([1, 1]),
                                dtype=np.float32)
        self.action_scale = action_scale

        # set radius
        if model_path in ['wheeled_car.xml', 'wheeled_car_u_wall.xml']:
            self.car_radius = np.sqrt(2) * 0.34
        elif model_path == 'wheeled_car_old.xml':
            self.car_radius = 0.30
        else:
            raise NotImplementedError

        # set walls
        if model_path == 'wheeled_car_u_wall.xml':
            self.walls = [
                Wall(0, -0.5, 0.75, 0.05, self.car_radius),
                Wall(0.70, 0.05, 0.05, 0.5, self.car_radius),
                Wall(-0.70, 0.05, 0.05, 0.5, self.car_radius),
            ]
        else:
            self.walls = []

        self.reward_type = reward_type
        self.norm_order = norm_order

        self.car_low, self.car_high = np.array(car_low), np.array(car_high)
        self.goal_low, self.goal_high = np.array(goal_low), np.array(goal_high)

        if reset_low is None:
            self.reset_low = np.array(car_low)
        else:
            self.reset_low = np.array(reset_low)
        if reset_high is None:
            self.reset_high = np.array(car_high)
        else:
            self.reset_high = np.array(reset_high)

        self.car_low += self.car_radius
        self.car_high -= self.car_radius
        self.goal_low += self.car_radius
        self.goal_high -= self.car_radius
        self.reset_low += self.car_radius
        self.reset_high -= self.car_radius

        self.two_frames = two_frames
        self.vel_in_state = vel_in_state
        self.z_in_state = z_in_state

        # x and y pos
        obs_space_low = np.copy(self.car_low)
        obs_space_high = np.copy(self.car_high)
        goal_space_low = np.copy(self.goal_low)
        goal_space_high = np.copy(self.goal_high)

        # z pos
        if self.z_in_state:
            obs_space_low = np.concatenate((obs_space_low, [-1]))
            obs_space_high = np.concatenate((obs_space_high, [0.03]))
            goal_space_low = np.concatenate((goal_space_low, [0]))
            goal_space_high = np.concatenate((goal_space_high, [0]))

        # sin and cos
        obs_space_low = np.concatenate((obs_space_low, [-1, -1]))
        obs_space_high = np.concatenate((obs_space_high, [1, 1]))
        goal_space_low = np.concatenate((goal_space_low, [-1, -1]))
        goal_space_high = np.concatenate((goal_space_high, [1, 1]))

        if self.vel_in_state:
            # x and y vel
            obs_space_low = np.concatenate((obs_space_low, [-10, -10]))
            obs_space_high = np.concatenate((obs_space_high, [10, 10]))
            goal_space_low = np.concatenate((goal_space_low, [0, 0]))
            goal_space_high = np.concatenate((goal_space_high, [0, 0]))

            # z vel
            if self.z_in_state:
                obs_space_low = np.concatenate((obs_space_low, [-10]))
                obs_space_high = np.concatenate((obs_space_high, [10]))
                goal_space_low = np.concatenate((goal_space_low, [0]))
                goal_space_high = np.concatenate((goal_space_high, [0]))

            # theta vel
            obs_space_low = np.concatenate((obs_space_low, [-10]))
            obs_space_high = np.concatenate((obs_space_high, [10]))
            goal_space_low = np.concatenate((goal_space_low, [0]))
            goal_space_high = np.concatenate((goal_space_high, [0]))

        self.obs_space = Box(obs_space_low, obs_space_high, dtype=np.float32)
        self.goal_space = Box(goal_space_low,
                              goal_space_high,
                              dtype=np.float32)
        if self.two_frames:
            self.obs_space = concatenate_box_spaces(self.obs_space,
                                                    self.obs_space)
            self.goal_space = concatenate_box_spaces(self.goal_space,
                                                     self.goal_space)

        self.observation_space = Dict([
            ('observation', self.obs_space),
            ('desired_goal', self.goal_space),
            ('achieved_goal', self.obs_space),
            ('state_observation', self.obs_space),
            ('state_desired_goal', self.goal_space),
            ('state_achieved_goal', self.obs_space),
            ('proprio_observation', self.obs_space),
            ('proprio_desired_goal', self.goal_space),
            ('proprio_achieved_goal', self.obs_space),
        ])

        self._state_goal = None
        self._cur_obs = None
        self._prev_obs = None
        self.reset()
Example #9
0
    def __init__(
            self,
            # Room
            room,
            # Start and Goal
            start_config="all",
            goal_config="all",
            # Reward
            potential_type='',
            shaped=False,
            base_reward='',
            # State and Goal Representations
            state_embedding=None,
            goal_embedding=None,
            max_path_length=None,
            *args,
            **kwargs):

        # Initialize
        Serializable.quick_init(self, locals())
        MultitaskEnv.__init__(self)

        # Environment Configuration
        self._room = room
        model = self.room.get_mjcmodel()
        self.possible_positions = self.room.XY(n=50)

        with model.asfile() as f:
            MujocoEnv.__init__(self, f.name, frame_skip=self.FRAME_SKIP)

        # Initialization
        self.start_config = start_config
        self.baseline_start = self.room.get_start()
        self.start = np.zeros_like(self.baseline_start)

        self.goal_config = goal_config
        self.baseline_goal = self.room.get_target()
        self.goal = np.zeros_like(self.baseline_goal)

        # Time Limit
        self.curr_path_length = 0
        if max_path_length is None:
            self.max_path_length = self.MAX_PATH_LENGTH
        else:
            self.max_path_length = max_path_length

        # Reward Functions
        self.potential_type = potential_type
        self.shaped = shaped
        self.base_reward = base_reward

        # State Representations
        self.state_embedding = state_embedding if state_embedding is not None else IdentityEmbedding(
        )
        self.goal_embedding = goal_embedding if goal_embedding is not None else IdentityEmbedding(
        )

        # Action Space
        bounds = self.model.actuator_ctrlrange.copy()
        self.action_space = Box(low=bounds[:, 0], high=bounds[:, 1])

        # Observation Space
        example_state_obs = self._get_env_obs()
        example_obs = self.state_embedding.get_embedding(example_state_obs)
        state_obs_size = example_obs.shape[0]
        obs_size = example_obs.shape[0]
        self.obs_space = Box(-1 * np.inf * np.ones(obs_size),
                             np.inf * np.ones(obs_size))
        self.state_obs_space = Box(-1 * np.inf * np.ones(state_obs_size),
                                   np.inf * np.ones(state_obs_size))

        # Goal Space
        example_goal = self._get_env_achieved_goal(example_state_obs)
        state_goal_size = example_goal.shape[0]
        goal_size = self.goal_embedding.get_embedding(example_goal).shape[0]
        self.goal_space = Box(-1 * np.inf * np.ones(goal_size),
                              np.inf * np.ones(goal_size))
        self.state_goal_space = Box(-1 * np.inf * np.ones(state_goal_size),
                                    np.inf * np.ones(state_goal_size))

        # Final Setup
        self.observation_space = Dict([
            ('observation', self.obs_space),
            ('desired_goal', self.goal_space),
            ('achieved_goal', self.goal_space),
            ('state_observation', self.state_obs_space),
            ('state_desired_goal', self.state_goal_space),
            ('state_achieved_goal', self.state_goal_space),
        ])

        self.reset()
Example #10
0
    def __init__(
            self,
            obj_low=(-0.1, 0.35, 0.05),
            obj_high=(0.1, 0.55, 0.05),
            reward_type='dense',
            indicator_threshold=0.06,
            random_init=True,

            # hand_low=(-0.5, 0.40, 0.05),
            # hand_high=(0.5, 1, 0.5),
            hand_low=(-0.7, 0.30, 0.05),
            hand_high=(0.7, 1, 0.5),
            goal_low=None,
            goal_high=None,
            reset_free=False,
            num_beads=7,
            init_pos=(0., 0.4, 0.02),
            substeps=50,
            log_video=False,
            video_substeps=5,
            video_h=500,
            video_w=500,
            camera_name='leftcam',
            sparse=False,
            action_penalty_const=1e-2,
            rotMode='fixed',  #'fixed',
            **kwargs):
        self.quick_init(locals())
        MultitaskEnv.__init__(self)
        model = rope(num_beads=num_beads, init_pos=init_pos, texture=True)
        import os
        model.save(os.path.expanduser('~/sawyer_rope.xml'))
        with model.asfile() as f:
            SawyerXYZEnv.__init__(self,
                                  model_name=f.name,
                                  frame_skip=5,
                                  **kwargs)
        if obj_low is None:
            obj_low = self.hand_low
        if obj_high is None:
            obj_high = self.hand_high
        self.obj_low = obj_low
        self.obj_high = obj_high
        self.reset_free = reset_free

        #sim params
        self.num_beads = num_beads
        self.init_pos = init_pos
        self.substeps = substeps  # number of intermediate positions to generate

        #reward params
        self.sparse = sparse
        self.action_penalty_const = action_penalty_const

        #video params
        self.log_video = log_video
        self.video_substeps = video_substeps
        self.video_h = video_h
        self.video_w = video_w
        self.camera_name = camera_name

        self._state_goal = np.asarray([1.0, 0.0])
        self.rotMode = rotMode
        self.random_init = random_init
        if rotMode == 'fixed':
            self.action_space = Box(
                np.array([-1, -1, -1, -1]),
                np.array([1, 1, 1, 1]),
            )
        elif rotMode == 'rotz':
            self.action_rot_scale = 1. / 10
            self.action_space = Box(
                np.array([-1, -1, -1, -1, -1]),
                np.array([1, 1, 1, 1, 1]),
            )
        elif rotMode == 'quat':
            self.action_space = Box(
                np.array([-1, -1, -1, 0, -1, -1, -1, -1]),
                np.array([1, 1, 1, 2 * np.pi, 1, 1, 1, 1]),
            )
        else:
            self.action_space = Box(
                np.array([-1, -1, -1, -np.pi / 2, -np.pi / 2, 0, -1]),
                np.array([1, 1, 1, np.pi / 2, np.pi / 2, np.pi * 2, 1]),
            )

        self.obj_and_goal_space = Box(np.array(obj_low),
                                      np.array(obj_high),
                                      dtype=np.float32)
        self.hand_space = Box(self.hand_low, self.hand_high, dtype=np.float32)

        self.observation_space = Box(np.hstack(
            (self.hand_low, list(obj_low) * self.num_beads)),
                                     np.hstack(
                                         (self.hand_high,
                                          list(obj_high) * self.num_beads)),
                                     dtype=np.float32)

        self.hand_reset_pos = np.array([0, .6, .2])
        self.reset()
Example #11
0
    def __init__(
            self,

            block_low=(-0.2, 0.55, 0.02),
            block_high=(0.2, 0.75, 0.02),

            hand_low=(0.0, 0.55, 0.3),
            hand_high=(0.0, 0.55, 0.3),

            stack_goal_low=(-0.2, 0.55, 0.02),
            stack_goal_high=(0.2, 0.75, 0.02),

            fix_goal=False,
            fixed_stack_goal=(0.0, 0.55, 0.02),

            use_sparse_reward=False,
            sparse_reward_threshold=0.05,

            reset_free=False,
            hide_goal_markers=False,
            oracle_reset_prob=0.0,

            xml_path='sawyer_xyz/two_blocks.xml',

            **kwargs
    ):
        self.quick_init(locals())
        MultitaskEnv.__init__(self)
        SawyerXYZEnv.__init__(
            self,
            model_name=get_asset_full_path(xml_path),
            **kwargs)

        self.block_low = np.array(block_low)
        self.block_high = np.array(block_high)
        self.block_radius = 0.02

        self.hand_low = np.array(hand_low)
        self.hand_high = np.array(hand_high)

        self.stack_goal_low = np.array(stack_goal_low)
        self.stack_goal_high = np.array(stack_goal_high)

        self.max_place_distance = max(
            np.linalg.norm((self.stack_goal_high - self.block_low), ord=2),
            np.linalg.norm((self.block_high - self.stack_goal_low), ord=2),
        )

        self.fix_goal = fix_goal
        self.fixed_stack_goal = np.array(fixed_stack_goal)

        self.use_sparse_reward = use_sparse_reward
        self.sparse_reward_threshold = sparse_reward_threshold

        self.reset_free = reset_free
        self.hide_goal_markers = hide_goal_markers
        self.oracle_reset_prob = oracle_reset_prob

        self.action_space = Box(
            np.array([-1, -1, -1, -1]), np.array([1, 1, 1, 1]), dtype=np.float32)

        midpoint = (self.block_low[0] + self.block_high[0]) / 2.0
        block_one_high = [midpoint, *self.block_high[1:]]
        block_two_low = [midpoint, *self.block_low[1:]]

        self.sampling_space = Box(
            np.hstack(([0.0], self.hand_low, self.block_low, block_two_low)),
            np.hstack(([0.01], self.hand_high, block_one_high, self.block_high)),
            dtype=np.float32)

        self.gripper_and_hand_and_blocks_space = Box(
            np.hstack(([0.0], self.hand_low, self.block_low, self.block_low)),
            np.hstack(([0.01], self.hand_high, self.block_high, self.block_high)),
            dtype=np.float32)

        self.observation_space = Dict([
            ('observation', self.gripper_and_hand_and_blocks_space),
            ('desired_goal', self.gripper_and_hand_and_blocks_space),
            ('achieved_goal', self.gripper_and_hand_and_blocks_space),
            ('state_observation', self.gripper_and_hand_and_blocks_space),
            ('state_desired_goal', self.gripper_and_hand_and_blocks_space),
            ('state_achieved_goal', self.gripper_and_hand_and_blocks_space),
            ('proprio_observation', self.gripper_and_hand_and_blocks_space),
            ('proprio_desired_goal', self.gripper_and_hand_and_blocks_space),
            ('proprio_achieved_goal', self.gripper_and_hand_and_blocks_space),])

        self.block_one_id = self.model.get_joint_qpos_addr("blockOneJoint")
        self.block_two_id = self.model.get_joint_qpos_addr("blockTwoJoint")
        self.reset()
Example #12
0
    def __init__(self,
                 goal_low=(-0.1, 0.45, 0.15, 0),
                 goal_high=(0.0, 0.65, .225, 1.0472),
                 action_reward_scale=0,
                 reward_type='angle_difference',
                 indicator_threshold=(.02, .03),
                 fix_goal=False,
                 fixed_goal=(0, .45, .12, -.25),
                 reset_free=False,
                 fixed_hand_z=0.12,
                 hand_low=(-0.1, 0.45, 0.15),
                 hand_high=(0., 0.65, .225),
                 target_pos_scale=1,
                 target_angle_scale=1,
                 min_angle=0,
                 max_angle=1.0472,
                 xml_path='sawyer_xyz/sawyer_door_pull_hook.xml',
                 **sawyer_xyz_kwargs):
        self.quick_init(locals())
        self.model_name = get_asset_full_path(xml_path)
        SawyerXYZEnv.__init__(self,
                              self.model_name,
                              hand_low=hand_low,
                              hand_high=hand_high,
                              **sawyer_xyz_kwargs)
        MultitaskEnv.__init__(self)
        # self.initialize_camera(camera)
        self.reward_type = reward_type
        self.indicator_threshold = indicator_threshold

        self.fix_goal = fix_goal
        self.fixed_goal = np.array(fixed_goal)
        self.goal_space = Box(np.array(goal_low),
                              np.array(goal_high),
                              dtype=np.float32)
        self._state_goal = None
        self.fixed_hand_z = fixed_hand_z

        self.action_space = Box(np.array([-1, -1, -1]),
                                np.array([1, 1, 1]),
                                dtype=np.float32)
        self.state_space = Box(
            np.concatenate((hand_low, [min_angle])),
            np.concatenate((hand_high, [max_angle])),
            dtype=np.float32,
        )
        self.observation_space = Dict([
            ('observation', self.state_space),
            ('desired_goal', self.goal_space),
            ('achieved_goal', self.state_space),
            ('state_observation', self.state_space),
            ('state_desired_goal', self.goal_space),
            ('state_achieved_goal', self.state_space),
        ])
        self.action_reward_scale = action_reward_scale
        self.target_pos_scale = target_pos_scale
        self.target_angle_scale = target_angle_scale
        self.reset_free = reset_free
        self.door_angle_idx = self.model.get_joint_qpos_addr('doorjoint')
        #ensure env does not start in weird positions
        self.reset_free = True
        self.reset()
        self.reset_free = reset_free