Esempi in Python per MultitaskEnv, esempi in Python per multiworld.core.multitask_env.MultitaskEnv

Esempio n. 1

0

Mostra file

 def __init__(self, action_scale=1, frame_skip=5, reward_type='vel_distance', indicator_threshold=.1, fixed_goal=5, fix_goal=False, max_speed=6):
     self.quick_init(locals())
     MultitaskEnv.__init__(self)
     self.action_scale = action_scale
     MujocoEnv.__init__(self, self.model_name, frame_skip=frame_skip)
     bounds = self.model.actuator_ctrlrange.copy()
     low = bounds[:, 0]
     high = bounds[:, 1]
     self.action_space = Box(low=low, high=high)
     self.reward_type = reward_type
     self.indicator_threshold=indicator_threshold
     self.fixed_goal = fixed_goal
     self.fix_goal = fix_goal
     self._state_goal = None
     self.goal_space = Box(np.array(-1*max_speed), np.array(max_speed))
     obs_size = self._get_env_obs().shape[0]
     high = np.inf * np.ones(obs_size)
     low = -high
     self.obs_space = Box(low, high)
     self.achieved_goal_space = Box(self.obs_space.low[8], self.obs_space.high[8])
     self.observation_space = Dict([
         ('observation', self.obs_space),
         ('desired_goal', self.goal_space),
         ('achieved_goal', self.achieved_goal_space),
         ('state_observation', self.obs_space),
         ('state_desired_goal', self.goal_space),
         ('state_achieved_goal', self.achieved_goal_space),
     ])
     self.reset()

Esempio n. 2

0

Mostra file

File: sawyer_reach.py Progetto: tianheyu927/multiworld

    def __init__(self,
                 reward_type='hand_distance',
                 norm_order=1,
                 indicator_threshold=0.06,
                 fix_goal=False,
                 fixed_goal=(0.15, 0.6, 0.3),
                 hide_goal_markers=False,
                 **kwargs):
        self.quick_init(locals())
        MultitaskEnv.__init__(self)
        SawyerXYZEnv.__init__(self, model_name=self.model_name, **kwargs)

        self.reward_type = reward_type
        self.norm_order = norm_order
        self.indicator_threshold = indicator_threshold

        self.fix_goal = fix_goal
        self.fixed_goal = np.array(fixed_goal)
        self._state_goal = None
        self.hide_goal_markers = hide_goal_markers
        self.action_space = Box(np.array([-1, -1, -1]), np.array([1, 1, 1]))
        self.hand_space = Box(self.hand_low, self.hand_high)
        self.observation_space = Dict([
            ('observation', self.hand_space),
            ('desired_goal', self.hand_space),
            ('achieved_goal', self.hand_space),
            ('state_observation', self.hand_space),
            ('state_desired_goal', self.hand_space),
            ('state_achieved_goal', self.hand_space),
            ('proprio_observation', self.hand_space),
            ('proprio_desired_goal', self.hand_space),
            ('proprio_achieved_goal', self.hand_space),
        ])

Esempio n. 3

0

Mostra file

    def __init__(self,
                 action_scale=1,
                 frame_skip=5,
                 force=None,
                 timestep_start=100000,
                 timestep_end=100000):
        self.quick_init(locals())
        MultitaskEnv.__init__(self)
        self.action_scale = action_scale
        MujocoEnv.__init__(self, self.model_name, frame_skip=frame_skip)
        bounds = self.model.actuator_ctrlrange.copy()
        low = bounds[:, 0]
        high = bounds[:, 1]
        self.action_space = Box(low=low, high=high)
        obs_size = self._get_env_obs().shape[0]
        high = np.inf * np.ones(obs_size)
        low = -high
        self.obs_space = Box(low, high)
        self.observation_space = Dict([
            ('observation', self.obs_space),
            ('state_observation', self.obs_space),
        ])

        self.force = force
        self.timestep_start = timestep_start
        self.timestep_end = timestep_end
        self.step_count = 0

        self.reset()

Esempio n. 4

0

Mostra file

    def __init__(self,
                 action_scale=1,
                 frame_skip=5,
                 obstacle_position=None,
                 obstacle_height=None):
        self.quick_init(locals())

        self.obstacle_position = obstacle_position
        self.obstacle_height = obstacle_height

        print(self.obstacle_position, self.obstacle_height)

        MultitaskEnv.__init__(self)
        self.action_scale = action_scale
        MujocoEnv.__init__(self, self.model_name, frame_skip=frame_skip)
        bounds = self.model.actuator_ctrlrange.copy()
        low = bounds[:, 0]
        high = bounds[:, 1]
        self.action_space = Box(low=low, high=high)
        obs_size = self._get_env_obs().shape[0]
        high = np.inf * np.ones(obs_size)
        low = -high
        self.obs_space = Box(low, high)
        self.observation_space = Dict([
            ('observation', self.obs_space),
            ('state_observation', self.obs_space),
        ])

        # self.obstacle_position = obstacle_position
        # self.obstacle_height = obstacle_height

        self.reset()

Esempio n. 5

0

Mostra file

 def __init__(self,
              frame_skip=30,
              action_scale=10,
              keep_vel_in_obs=True,
              use_safety_box=False,
              fix_goal=False,
              fixed_goal=(0.05, 0.6, 0.15),
              reward_type='hand_distance',
              indicator_threshold=.05,
              goal_low=None,
              goal_high=None,
              ):
     self.quick_init(locals())
     MultitaskEnv.__init__(self)
     self.action_scale = action_scale
     MujocoEnv.__init__(self, self.model_name, frame_skip=frame_skip)
     bounds = self.model.actuator_ctrlrange.copy()
     low = bounds[:, 0]
     high = bounds[:, 1]
     self.action_space = Box(low=low, high=high)
     if goal_low is None:
         goal_low = np.array([-0.1, 0.5, 0.02])
     else:
         goal_low = np.array(goal_low)
     if goal_high is None:
         goal_high = np.array([0.1, 0.7, 0.2])
     else:
         goal_high = np.array(goal_low)
     self.safety_box = Box(
         goal_low,
         goal_high
     )
     self.keep_vel_in_obs = keep_vel_in_obs
     self.goal_space = Box(goal_low, goal_high)
     obs_size = self._get_env_obs().shape[0]
     high = np.inf * np.ones(obs_size)
     low = -high
     self.obs_space = Box(low, high)
     self.achieved_goal_space = Box(
         -np.inf * np.ones(3),
         np.inf * np.ones(3)
     )
     self.observation_space = Dict([
         ('observation', self.obs_space),
         ('desired_goal', self.goal_space),
         ('achieved_goal', self.achieved_goal_space),
         ('state_observation', self.obs_space),
         ('state_desired_goal', self.goal_space),
         ('state_achieved_goal', self.achieved_goal_space),
     ])
     self.fix_goal = fix_goal
     self.fixed_goal = np.array(fixed_goal)
     self.use_safety_box=use_safety_box
     self.prev_qpos = self.init_angles.copy()
     self.reward_type = reward_type
     self.indicator_threshold = indicator_threshold
     goal = self.sample_goal()
     self._state_goal = goal['state_desired_goal']
     self.reset()

Esempio n. 6

0

Mostra file

File: sawyer_door.py Progetto: mihdalal/multiworld

    def __init__(
        self,
        goal_low=(-.25, .3, .12, -1.5708),
        goal_high=(.25, .6, .12, 0),
        action_reward_scale=0,
        reward_type='angle_difference',
        indicator_threshold=(.02, .03),
        fix_goal=False,
        fixed_goal=(0, .45, .12, -.25),
        reset_free=False,
        fixed_hand_z=0.12,
        hand_low=(-0.25, 0.3, .12),
        hand_high=(0.25, 0.6, .12),
        target_pos_scale=1,
        target_angle_scale=1,
        min_angle=-1.5708,
        max_angle=0,
        xml_path='sawyer_xyz/sawyer_door_pull.xml',
        **sawyer_xyz_kwargs
    ):
        self.quick_init(locals())
        self.model_name = get_asset_full_path(xml_path)
        SawyerXYZEnv.__init__(
            self,
            self.model_name,
            hand_low=hand_low,
            hand_high=hand_high,
            **sawyer_xyz_kwargs
        )
        MultitaskEnv.__init__(self)

        self.reward_type = reward_type
        self.indicator_threshold = indicator_threshold

        self.fix_goal = fix_goal
        self.fixed_goal = np.array(fixed_goal)
        self.goal_space = Box(np.array(goal_low), np.array(goal_high))
        self._state_goal = None
        self.fixed_hand_z = fixed_hand_z

        self.action_space = Box(np.array([-1, -1]), np.array([1, 1]))
        self.state_space = Box(
            np.concatenate((hand_low, [min_angle])),
            np.concatenate((hand_high, [max_angle])),
        )
        self.observation_space = Dict([
            ('observation', self.state_space),
            ('desired_goal', self.goal_space),
            ('achieved_goal', self.state_space),
            ('state_observation', self.state_space),
            ('state_desired_goal', self.goal_space),
            ('state_achieved_goal', self.state_space),
        ])
        self.action_reward_scale = action_reward_scale
        self.target_pos_scale = target_pos_scale
        self.target_angle_scale = target_angle_scale
        self.reset_free = reset_free
        self.door_angle_idx = self.model.get_joint_qpos_addr('doorjoint')
        self.reset()

Esempio n. 7

0

Mostra file

    def __init__(self,
                 obj_low=None,
                 obj_high=None,
                 reward_type='hand_and_obj_distance',
                 indicator_threshold=0.06,
                 obj_init_pos=(0, 0.65, 0.03),
                 fix_goal=False,
                 fixed_goal=(0.15, 0.6, 0.055, -0.15, 0.6),
                 goal_low=None,
                 goal_high=None,
                 hide_goal_markers=False,
                 hide_arm=False,
                 num_objects=1,
                 **kwargs):
        self.quick_init(locals())
        MultitaskEnv.__init__(self)
        self.hide_arm = hide_arm
        SawyerXYZEnv.__init__(self, model_name=self.model_name, **kwargs)
        self.num_objects = num_objects
        if obj_low is None:
            obj_low = self.hand_low
        if obj_high is None:
            obj_high = self.hand_high

        if goal_low is None:
            goal_low = np.hstack((self.hand_low, np.tile(obj_low,
                                                         num_objects)))
        if goal_high is None:
            goal_high = np.hstack(
                (self.hand_high, np.tile(obj_high, num_objects)))

        self.reward_type = reward_type
        self.indicator_threshold = indicator_threshold

        self.obj_init_pos = np.array(obj_init_pos)

        self.fix_goal = fix_goal
        self.fixed_goal = np.array(fixed_goal)
        self._state_goal = None

        self.hide_goal_markers = hide_goal_markers

        self.action_space = Box(
            np.array([-1, -1, -1, -1]),
            np.array([1, 1, 1, 1]),
        )
        self.hand_and_obj_space = Box(
            np.hstack((self.hand_low, np.tile(obj_low, num_objects))),
            np.hstack((self.hand_high, np.tile(obj_high, num_objects))),
        )
        self.observation_space = Dict([
            ('observation', self.hand_and_obj_space),
            ('desired_goal', self.hand_and_obj_space),
            ('achieved_goal', self.hand_and_obj_space),
            ('state_observation', self.hand_and_obj_space),
            ('state_desired_goal', self.hand_and_obj_space),
            ('state_achieved_goal', self.hand_and_obj_space),
        ])

Esempio n. 8

0

Mostra file

    def __init__(
        self,
        frame_skip=30,
        goal_low=(-.1, .5, .06, 0),
        goal_high=(.1, .6, .06, .5),
        action_reward_scale=0,
        pos_action_scale=1 / 100,
        reward_type='angle_difference',
        indicator_threshold=(.02, .03),
        fix_goal=False,
        fixed_goal=(0.15, 0.6, 0.3, 0),
        target_pos_scale=0.25,
        target_angle_scale=1,
        max_x_pos=.1,
        max_y_pos=.7,
    ):

        self.quick_init(locals())
        MultitaskEnv.__init__(self)
        MujocoEnv.__init__(self, self.model_name, frame_skip=frame_skip)

        self.reward_type = reward_type
        self.indicator_threshold = indicator_threshold

        self.fix_goal = fix_goal
        self.fixed_goal = np.array(fixed_goal)
        self.goal_space = Box(
            np.array(goal_low),
            np.array(goal_high),
        )
        self._state_goal = None

        self.action_space = Box(np.array([-1, -1, -1, -1]),
                                np.array([1, 1, 1, 1]))
        max_angle = 1.5708
        self.state_space = Box(
            np.array([-1, -1, -1, -max_angle]),
            np.array([1, 1, 1, max_angle]),
        )
        self.observation_space = Dict([
            ('observation', self.state_space),
            ('desired_goal', self.state_space),
            ('achieved_goal', self.state_space),
            ('state_observation', self.state_space),
            ('state_desired_goal', self.state_space),
            ('state_achieved_goal', self.state_space),
        ])
        self._pos_action_scale = pos_action_scale
        self.target_pos_scale = target_pos_scale
        self.action_reward_scale = action_reward_scale
        self.target_angle_scale = target_angle_scale

        self.max_x_pos = max_x_pos
        self.max_y_pos = max_y_pos
        self.min_y_pos = .5

        self.reset()
        self.reset_mocap_welds()

Esempio n. 9

0

Mostra file

File: sawyer_push_and_reach_env.py Progetto: giuliavezzani/multiworld

    def __init__(self,
                 puck_low=None,
                 puck_high=None,
                 reward_type='hand_and_puck_distance',
                 indicator_threshold=0.06,
                 fix_goal=False,
                 fixed_goal=(0.15, 0.6, 0.055, -0.15, 0.6),
                 goal_low=None,
                 goal_high=None,
                 hide_goal_markers=False,
                 **kwargs):
        self.quick_init(locals())
        MultitaskEnv.__init__(self)
        SawyerXYZEnv.__init__(self, model_name=self.model_name, **kwargs)
        if puck_low is None:
            puck_low = self.hand_low[:2]
        if puck_high is None:
            puck_high = self.hand_high[:2]
        puck_low = np.array(puck_low)
        puck_high = np.array(puck_high)

        self.puck_low = puck_low
        self.puck_high = puck_high

        if goal_low is None:
            goal_low = np.hstack((self.hand_low, puck_low))
        if goal_high is None:
            goal_high = np.hstack((self.hand_high, puck_high))
        goal_low = np.array(goal_low)
        goal_high = np.array(goal_high)

        self.reward_type = reward_type
        self.indicator_threshold = indicator_threshold

        self.fix_goal = fix_goal
        self.fixed_goal = np.array(fixed_goal)
        self._state_goal = None

        self.hide_goal_markers = hide_goal_markers

        self.action_space = Box(np.array([-1, -1, -1]), np.array([1, 1, 1]))
        self.hand_and_puck_space = Box(
            np.hstack((self.hand_low, puck_low)),
            np.hstack((self.hand_high, puck_high)),
        )
        self.hand_space = Box(self.hand_low, self.hand_high)
        self.observation_space = Dict([
            ('observation', self.hand_and_puck_space),
            ('desired_goal', self.hand_and_puck_space),
            ('achieved_goal', self.hand_and_puck_space),
            ('state_observation', self.hand_and_puck_space),
            ('state_desired_goal', self.hand_and_puck_space),
            ('state_achieved_goal', self.hand_and_puck_space),
            ('proprio_observation', self.hand_space),
            ('proprio_desired_goal', self.hand_space),
            ('proprio_achieved_goal', self.hand_space),
        ])
        self.init_puck_z = self.get_puck_pos()[2]

Esempio n. 10

0

Mostra file

File: sawyer_pickPlace_cabinet.py Progetto: wwxFromTju/R_multiworld

    def __init__(
            self,
            obj_low=None,
            obj_high=None,
            reward_type='hand_and_obj_distance',
            indicator_threshold=0.06,
            obj_init_pos=(0, 0.6, 0.02),
            fix_goal=True,
            fixed_goal=(0, 0.85, 0.12, 0.1),
            #3D placing goal, followed by height target for picking
            goal_low=None,
            goal_high=None,
            hide_goal_markers=False,
            **kwargs):
        self.quick_init(locals())
        MultitaskEnv.__init__(self)
        SawyerXYZEnv.__init__(self, model_name=self.model_name, **kwargs)
        if obj_low is None:
            obj_low = self.hand_low
        if obj_high is None:
            obj_high = self.hand_high

        if goal_low is None:
            goal_low = np.hstack((self.hand_low, obj_low))
        if goal_high is None:
            goal_high = np.hstack((self.hand_high, obj_high))

        self.max_path_length = 200

        self.reward_type = reward_type
        self.indicator_threshold = indicator_threshold

        self.obj_init_pos = np.array(obj_init_pos)

        self.fix_goal = fix_goal
        self.fixed_goal = np.array(fixed_goal)

        self.hide_goal_markers = hide_goal_markers

        self.action_space = Box(
            np.array([-1, -1, -1, -1]),
            np.array([1, 1, 1, 1]),
        )
        self.hand_and_obj_space = Box(
            np.hstack((self.hand_low, obj_low)),
            np.hstack((self.hand_high, obj_high)),
        )
        self.observation_space = Dict([
            ('observation', self.hand_and_obj_space),
            ('desired_goal', self.hand_and_obj_space),
            ('achieved_goal', self.hand_and_obj_space),
            ('state_observation', self.hand_and_obj_space),
            ('state_desired_goal', self.hand_and_obj_space),
            ('state_achieved_goal', self.hand_and_obj_space),
        ])

        self.reset()

Esempio n. 11

0

Mostra file

 def step(self, action):
     self._act(action)
     observation = self._get_obs()
     reward = MultitaskEnv.compute_reward(self, action, observation)
     info = self._get_info()
     done = False
     return observation, reward, done, info

Esempio n. 12

0

Mostra file

    def __init__(
        self,
        frame_skip=50,
        goal_low=-.5,
        goal_high=.5,
        pos_action_scale=1 / 100,
        action_reward_scale=0,
        reward_type='angle_difference',
        indicator_threshold=0.02,
        fix_goal=False,
        fixed_goal=.25,
    ):
        self.quick_init(locals())
        MultitaskEnv.__init__(self)
        MujocoEnv.__init__(self, self.model_name, frame_skip=frame_skip)

        self.reward_type = reward_type
        self.indicator_threshold = indicator_threshold

        self.fix_goal = fix_goal
        self.fixed_goal = np.array([fixed_goal])
        self.goal_space = Box(np.array([goal_low]), np.array([goal_high]))
        self._state_goal = None

        self.action_space = Box(np.array([-1, -1, -1, -1]),
                                np.array([1, 1, 1, 1]))
        max_angle = 1.5708
        self.state_space = Box(
            np.array([-1, -1, -1, -max_angle]),
            np.array([1, 1, 1, max_angle]),
        )
        self.angle_space = Box(np.array([-max_angle]), np.array([max_angle]))
        self.observation_space = Dict([
            ('observation', self.state_space),
            ('desired_goal', self.goal_space),
            ('achieved_goal', self.angle_space),
            ('state_observation', self.state_space),
            ('state_desired_goal', self.goal_space),
            ('state_achieved_goal', self.angle_space),
        ])
        self._pos_action_scale = pos_action_scale
        self.action_reward_scale = action_reward_scale

        self.reset()
        self.reset_mocap_welds()

Esempio n. 13

0

Mostra file

File: ant.py Progetto: szk9876/multiworld

 def __init__(self, action_scale=1, frame_skip=5):
     # self.init_serialization(locals())
     self.quick_init(locals())
     MultitaskEnv.__init__(self)
     self.action_scale = action_scale
     MujocoEnv.__init__(self, self.model_name, frame_skip=frame_skip)
     bounds = self.model.actuator_ctrlrange.copy()
     low = bounds[:, 0]
     high = bounds[:, 1]
     self.action_space = Box(low=low, high=high)
     obs_size = self._get_env_obs().shape[0]
     high = np.inf * np.ones(obs_size)
     low = -high
     self.obs_space = Box(low=low, high=high)
     self.observation_space = Dict([
         ('observation', self.obs_space),
         ('state_observation', self.obs_space),
     ])
     self.reset()

Esempio n. 14

0

Mostra file

File: sawyer_pick_and_place.py Progetto: CoffeeddCat/MapGo

    def __init__(
            self,
            obj_low=None,
            obj_high=None,

            reward_type='hand_and_obj_distance',
            indicator_threshold=0.06,

            obj_init_positions=((0, 0.6, 0.02),),
            random_init=False,

            fix_goal=False,
            fixed_goal=(0.15, 0.6, 0.055, -0.15, 0.6),
            goal_low=None,
            goal_high=None,
            reset_free=False,

            hide_goal_markers=False,
            oracle_reset_prob=0.0,
            presampled_goals=None,
            num_goals_presampled=1000,
            p_obj_in_hand=.75,

            **kwargs
    ):
        self.quick_init(locals())
        MultitaskEnv.__init__(self)
        SawyerXYZEnv.__init__(
            self,
            model_name=self.model_name,
            **kwargs
        )
        if obj_low is None:
            obj_low = self.hand_low
        if obj_high is None:
            obj_high = self.hand_high
        self.obj_low = obj_low
        self.obj_high = obj_high
        if goal_low is None:
            goal_low = np.hstack((self.hand_low, obj_low))
        if goal_high is None:
            goal_high = np.hstack((self.hand_high, obj_high))

        self.reward_type = reward_type
        self.random_init = random_init
        self.p_obj_in_hand = p_obj_in_hand
        self.indicator_threshold = indicator_threshold

        self.obj_init_z = obj_init_positions[0][2]
        self.obj_init_positions = np.array(obj_init_positions)
        self.last_obj_pos = self.obj_init_positions[0]

        self.fix_goal = fix_goal
        self.fixed_goal = np.array(fixed_goal)
        self._state_goal = None
        self.reset_free = reset_free
        self.oracle_reset_prob = oracle_reset_prob

        self.hide_goal_markers = hide_goal_markers

        self.action_space = Box(
            np.array([-1, -1, -1, -1]),
            np.array([1, 1, 1, 1]),
            dtype=np.float32
        )
        self.hand_and_obj_space = Box(
            np.hstack((self.hand_low, obj_low)),
            np.hstack((self.hand_high, obj_high)),
            dtype=np.float32
        )
        self.hand_space = Box(
            self.hand_low,
            self.hand_high,
            dtype=np.float32
        )
        self.gripper_and_hand_and_obj_space = Box(
            np.hstack(([0.0], self.hand_low, obj_low)),
            np.hstack(([0.04], self.hand_high, obj_high)),
            dtype=np.float32
        )

        self.observation_space = Dict([
            ('observation', self.gripper_and_hand_and_obj_space),
            ('desired_goal', self.hand_and_obj_space),
            ('achieved_goal', self.hand_and_obj_space),
            ('state_observation', self.gripper_and_hand_and_obj_space),
            ('state_desired_goal', self.hand_and_obj_space),
            ('state_achieved_goal', self.hand_and_obj_space),
            ('proprio_observation', self.hand_space),
            ('proprio_desired_goal', self.hand_space),
            ('proprio_achieved_goal', self.hand_space),
        ])
        self.hand_reset_pos = np.array([0, .6, .2])

        if presampled_goals is not None:
            self._presampled_goals = presampled_goals
            self.num_goals_presampled = len(list(self._presampled_goals.values)[0])
        else:
            # presampled_goals will be created when sample_goal is first called
            self._presampled_goals = None
            self.num_goals_presampled = num_goals_presampled
        self.picked_up_object = False
        self.train_pickups = 0
        self.eval_pickups = 0
        self.cur_mode = 'train'
        self.reset()

Esempio n. 15

0

Mostra file

File: sawyer_push_and_reach_env.py Progetto: justinvyu/multiworld-avi

    def __init__(self,
                 puck_low=(-.4, .2),
                 puck_high=(.4, 1),
                 reward_type='state_distance',
                 norm_order=1,
                 indicator_threshold=0.06,
                 hand_low=(-0.28, 0.3, 0.05),
                 hand_high=(0.28, 0.9, 0.3),
                 fix_goal=False,
                 fixed_goal=(0.15, 0.6, 0.02, -0.15, 0.6),
                 goal_low=(-0.25, 0.3, 0.02, -.2, .4),
                 goal_high=(0.25, 0.875, 0.02, .2, .8),
                 hide_goal_markers=False,
                 init_puck_z=0.02,
                 init_hand_xyz=(0, 0.4, 0.07),
                 reset_free=False,
                 xml_path='sawyer_xyz/sawyer_push_puck.xml',
                 clamp_puck_on_step=False,
                 puck_radius=.07,
                 **kwargs):
        self.quick_init(locals())
        self.model_name = get_asset_full_path(xml_path)
        MultitaskEnv.__init__(self)
        SawyerXYZEnv.__init__(self,
                              hand_low=hand_low,
                              hand_high=hand_high,
                              model_name=self.model_name,
                              **kwargs)
        if puck_low is None:
            puck_low = self.hand_low[:2]
        if puck_high is None:
            puck_high = self.hand_high[:2]
        puck_low = np.array(puck_low)
        puck_high = np.array(puck_high)

        self.puck_low = puck_low
        self.puck_high = puck_high

        if goal_low is None:
            goal_low = np.hstack((self.hand_low, puck_low))
        if goal_high is None:
            goal_high = np.hstack((self.hand_high, puck_high))
        self.goal_low = np.array(goal_low)
        self.goal_high = np.array(goal_high)

        self.reward_type = reward_type
        self.norm_order = norm_order
        self.indicator_threshold = indicator_threshold

        self.fix_goal = fix_goal
        self.fixed_goal = np.array(fixed_goal)
        self._state_goal = None

        self.hide_goal_markers = hide_goal_markers

        self.action_space = Box(np.array([-1, -1, -1]),
                                np.array([1, 1, 1]),
                                dtype=np.float32)
        self.hand_and_puck_space = Box(np.hstack((self.hand_low, puck_low)),
                                       np.hstack((self.hand_high, puck_high)),
                                       dtype=np.float32)
        self.hand_space = Box(self.hand_low, self.hand_high, dtype=np.float32)
        self.observation_space = Dict([
            ('observation', self.hand_and_puck_space),
            ('desired_goal', self.hand_and_puck_space),
            ('achieved_goal', self.hand_and_puck_space),
            ('state_observation', self.hand_and_puck_space),
            ('state_desired_goal', self.hand_and_puck_space),
            ('state_achieved_goal', self.hand_and_puck_space),
            ('proprio_observation', self.hand_space),
            ('proprio_desired_goal', self.hand_space),
            ('proprio_achieved_goal', self.hand_space),
        ])
        self.init_puck_z = init_puck_z
        self.init_hand_xyz = np.array(init_hand_xyz)
        self._set_puck_xy(self.sample_puck_xy())
        self.reset_free = reset_free
        self.reset_counter = 0
        self.puck_space = Box(self.puck_low, self.puck_high, dtype=np.float32)
        self.clamp_puck_on_step = clamp_puck_on_step
        self.puck_radius = puck_radius
        self.reset()

Esempio n. 16

0

Mostra file

File: sawyer_door_hook.py Progetto: scottemmons/gcsl

    def __init__(self,
                 goal_low=(0, ),
                 goal_high=(1.0472, ),
                 action_reward_scale=0,
                 reward_type='angle_difference',
                 indicator_threshold=(.02, .03),
                 fix_goal=False,
                 fixed_goal=(-.25),
                 reset_free=False,
                 fixed_hand_z=0.12,
                 hand_low=(-0.1, 0.45, 0.15),
                 hand_high=(0., 0.65, .225),
                 target_pos_scale=1,
                 target_angle_scale=1,
                 min_angle=0,
                 max_angle=1.0472,
                 xml_path='sawyer_xyz/sawyer_door_pull_hook.xml',
                 **sawyer_xyz_kwargs):
        self.quick_init(locals())
        self.model_name = get_asset_full_path(xml_path)
        SawyerXYZEnv.__init__(self,
                              self.model_name,
                              hand_low=hand_low,
                              hand_high=hand_high,
                              **sawyer_xyz_kwargs)
        MultitaskEnv.__init__(self)
        # self.initialize_camera(camera)
        self.reward_type = reward_type
        self.indicator_threshold = indicator_threshold

        self.fix_goal = fix_goal
        self.fixed_goal = np.array(fixed_goal)
        self.goal_space = Box(np.array(goal_low),
                              np.array(goal_high),
                              dtype=np.float32)
        self._state_goal = None
        self.fixed_hand_z = fixed_hand_z

        self.action_space = Box(np.array([-1, -1, -1]),
                                np.array([1, 1, 1]),
                                dtype=np.float32)
        self.state_space = Box(
            np.concatenate((hand_low, [min_angle])),
            np.concatenate((hand_high, [max_angle])),
            dtype=np.float32,
        )
        self.observation_space = Dict([
            ('observation', self.state_space),
            ('desired_goal', self.goal_space),
            ('achieved_goal', self.goal_space),
            ('state_observation', self.state_space),
            ('state_desired_goal', self.goal_space),
            ('state_achieved_goal', self.goal_space),
        ])
        self.action_reward_scale = action_reward_scale
        self.target_pos_scale = target_pos_scale
        self.target_angle_scale = target_angle_scale
        self.reset_free = reset_free
        self.door_angle_idx = self.model.get_joint_qpos_addr('doorjoint')
        #ensure env does not start in weird positions
        self.reset_free = True
        self.reset()
        self.reset_free = reset_free

Esempio n. 17

0

Mostra file

    def __init__(
            self,
            puck_low=(-.35, .25),
            puck_high=(.35, .95),

            norm_order=1,
            indicator_threshold=0.06,
            touch_threshold=0.1,  # I just chose this number after doing a few runs and looking at a histogram

            hand_low=(-0.28, 0.3, 0.05),
            hand_high=(0.28, 0.9, 0.3),

            fix_goal=True,
            fixed_goal=(0.15, 0.6, 0.02, -0.15, 0.6),
            goal_low=(-0.25, 0.3, 0.02, -0.2, .4),
            goal_high=(0.25, 0.875, 0.02, .2, .8),

            init_puck_z=0.035,
            init_hand_xyz=(0, 0.4, 0.07),

            reset_free=False,
            xml_path='sawyer_xyz/sawyer_push_puck.xml',
            clamp_puck_on_step=False,

            puck_radius=.07,
            **kwargs
    ):
        self.quick_init(locals())
        self.model_name = get_asset_full_path(xml_path)

        self.goal_type = kwargs.pop('goal_type', 'puck')
        self.dense_reward = kwargs.pop('dense_reward', False)
        self.indicator_threshold = kwargs.pop('goal_tolerance', indicator_threshold)
        self.fixed_goal = np.array(kwargs.pop('goal', fixed_goal))
        self.task_agnostic = kwargs.pop('task_agnostic', False)
        self.init_puck_xy = np.array(kwargs.pop('init_puck_xy', [0, 0.6]))
        self.init_hand_xyz = np.array(kwargs.pop('init_hand_xyz', init_hand_xyz))

        MultitaskEnv.__init__(self)
        SawyerXYZEnv.__init__(
            self,
            hand_low=hand_low,
            hand_high=hand_high,
            model_name=self.model_name,
            **kwargs
        )
        if puck_low is None:
            puck_low = self.hand_low[:2]
        if puck_high is None:
            puck_high = self.hand_high[:2]

        self.puck_low = np.array(puck_low)
        self.puck_high = np.array(puck_high)

        if goal_low is None:
            goal_low = np.hstack((self.hand_low, puck_low))
        if goal_high is None:
            goal_high = np.hstack((self.hand_high, puck_high))
        self.goal_low = np.array(goal_low)
        self.goal_high = np.array(goal_high)

        self.norm_order = norm_order
        self.touch_threshold = touch_threshold
        self.fix_goal = fix_goal

        self._state_goal = None

        self.hide_goal_markers = self.task_agnostic

        self.action_space = Box(np.array([-1, -1, -1]), np.array([1, 1, 1]), dtype=np.float32)
        self.hand_and_puck_space = Box(
            np.hstack((self.hand_low, puck_low)),
            np.hstack((self.hand_high, puck_high)),
            dtype=np.float32
        )
        self.hand_space = Box(self.hand_low, self.hand_high, dtype=np.float32)
        self.observation_space = Dict([
            ('observation', self.hand_and_puck_space),
            ('desired_goal', self.hand_and_puck_space),
            ('achieved_goal', self.hand_and_puck_space),
            ('state_observation', self.hand_and_puck_space),
            ('state_desired_goal', self.hand_and_puck_space),
            ('state_achieved_goal', self.hand_and_puck_space),
            ('proprio_observation', self.hand_space),
            ('proprio_desired_goal', self.hand_space),
            ('proprio_achieved_goal', self.hand_space),
        ])

        self.init_puck_z = init_puck_z
        self._set_puck_xy(self.sample_puck_xy())
        self.reset_free = reset_free
        self.reset_counter = 0
        self.puck_space = Box(self.puck_low, self.puck_high, dtype=np.float32)
        self.clamp_puck_on_step = clamp_puck_on_step
        self.puck_radius = puck_radius
        self.reset()

Esempio n. 18

0

Mostra file

    def __init__(
            self,
            reward_type='dense',
            count_bonus_coeff=None,
            n_bins=16,
            target_radius=0.5,  # Required distance to goal when using sparse reward
            norm_order=2,
            frame_skip=5,
            two_frames=False,
            vel_in_state=True,
            ant_low=list([-6, -6]),
            ant_high=list([6, 6]),
            goal_low=list([-6, -6]),
            goal_high=list([6, 6]),
            model_path='classic_mujoco/normal_gear_ratio_ant.xml',
            use_low_gear_ratio=False,
            goal_is_xy=False,
            goal_is_qpos=False,
            init_qpos=None,
            fixed_goal=None,
            diagnostics_goal=None,
            init_xy_mode='corner',
            terminate_when_unhealthy=False,
            healthy_z_range=(0.2, 0.9),
            # health_reward=10,
            done_penalty=0,
            goal_sampling_strategy='uniform',
            presampled_goal_paths='',
            *args,
            **kwargs):
        assert init_xy_mode in {
            'corner',
            'sample-uniformly-xy-space',
            'sample-from-goal-space',  # soon to be deprecated
        }
        assert not goal_is_xy or not goal_is_qpos
        self.quick_init(locals())
        MultitaskEnv.__init__(self)
        if use_low_gear_ratio:
            model_path = "classic_mujoco/ant_maze_gear30_small_dt3_with_invis.xml"
        MujocoEnv.__init__(self,
                           model_path=get_asset_full_path(model_path),
                           frame_skip=frame_skip,
                           **kwargs)
        if goal_is_xy:
            assert reward_type.startswith('xy')

        if init_qpos is not None:
            self.init_qpos[:len(init_qpos)] = np.array(init_qpos)

        self.action_space = Box(-np.ones(8), np.ones(8), dtype=np.float32)
        self.reward_type = reward_type
        self.count_bonus_coeff = count_bonus_coeff
        self.n_bins = n_bins
        self.bin_counts = np.ones((self.n_bins, self.n_bins))
        self.target_radius = target_radius
        self.norm_order = norm_order
        self.goal_is_xy = goal_is_xy
        self.goal_is_qpos = goal_is_qpos
        self.fixed_goal = fixed_goal
        self.diagnostics_goal = diagnostics_goal
        print(f"[Ant] Diagnostics goal: {self.diagnostics_goal}")
        self.init_xy_mode = init_xy_mode
        self.terminate_when_unhealthy = terminate_when_unhealthy
        # self._healthy_reward = health_reward
        self._done_penalty = done_penalty
        self._healthy_z_range = healthy_z_range

        self.model_path = model_path
        assert goal_sampling_strategy in {'uniform', 'preset1', 'presampled'}
        self.goal_sampling_strategy = goal_sampling_strategy
        if self.goal_sampling_strategy == 'presampled':
            assert presampled_goal_paths is not None
            if not osp.exists(presampled_goal_paths):
                presampled_goal_paths = get_asset_full_path(
                    presampled_goal_paths)
            self.presampled_goals = np.load(presampled_goal_paths)
        else:
            self.presampled_goals = None

        self.ant_low, self.ant_high = np.array(ant_low), np.array(ant_high)
        goal_low, goal_high = np.array(goal_low), np.array(goal_high)
        self.two_frames = two_frames
        self.vel_in_state = vel_in_state
        if self.vel_in_state:
            obs_space_low = np.concatenate((self.ant_low, -np.ones(27)))
            obs_space_high = np.concatenate((self.ant_high, np.ones(27)))
            if goal_is_xy:
                goal_space_low = goal_low
                goal_space_high = goal_high
            else:
                goal_space_low = np.concatenate((goal_low, -np.ones(27)))
                goal_space_high = np.concatenate((goal_high, np.ones(27)))
        else:
            obs_space_low = np.concatenate((self.ant_low, -np.ones(13)))
            obs_space_high = np.concatenate((self.ant_high, np.ones(13)))
            if goal_is_xy:
                goal_space_low = goal_low
                goal_space_high = goal_high
            else:
                goal_space_low = np.concatenate((goal_low, -np.ones(13)))
                goal_space_high = np.concatenate((goal_high, np.ones(13)))

        if self.two_frames:
            self.obs_space = Box(np.concatenate(
                (obs_space_low, obs_space_low)),
                                 np.concatenate(
                                     (obs_space_high, obs_space_high)),
                                 dtype=np.float32)
            self.goal_space = Box(np.concatenate(
                (goal_space_low, goal_space_low)),
                                  np.concatenate(
                                      (goal_space_high, goal_space_high)),
                                  dtype=np.float32)
        else:
            self.obs_space = Box(obs_space_low,
                                 obs_space_high,
                                 dtype=np.float32)
            self.goal_space = Box(goal_space_low,
                                  goal_space_high,
                                  dtype=np.float32)
        qpos_space = Box(-10 * np.ones(15), 10 * np.ones(15))

        spaces = [
            ('observation', self.obs_space),
            ('desired_goal', self.goal_space),
            ('achieved_goal', self.goal_space),
            ('state_observation', self.obs_space),
            ('state_desired_goal', self.goal_space),
            ('state_achieved_goal', self.goal_space),
        ]
        if self.goal_is_xy:
            self.xy_obs_space = Box(goal_low, goal_high, dtype=np.float32)
            spaces += [
                ('xy_observation', self.xy_obs_space),
                ('xy_desired_goal', self.xy_obs_space),
                ('xy_achieved_goal', self.xy_obs_space),
            ]
        if self.goal_is_qpos:
            spaces += [
                ('qpos_desired_goal', qpos_space),
                ('qpos_achieved_goal', qpos_space),
            ]

        self.observation_space = Dict(spaces)

        self._full_state_goal = None
        self._xy_goal = None
        self._qpos_goal = None
        self._prev_obs = None
        self._cur_obs = None

Esempio n. 19

0

Mostra file

    def __init__(self,
                 block_low=(-0.2, 0.55, 0.02),
                 block_high=(0.2, 0.75, 0.02),
                 hand_low=(0.0, 0.55, 0.3),
                 hand_high=(0.0, 0.55, 0.3),
                 stack_goal_low=(-0.2, 0.55, 0.02),
                 stack_goal_high=(0.2, 0.75, 0.02),
                 fix_goal=False,
                 fixed_stack_goal=(0.0, 0.55, 0.02),
                 fixed_hand_goal=(0.0, 0.75, 0.3),
                 use_sparse_reward=False,
                 sparse_reward_threshold=0.05,
                 reset_free=False,
                 hide_goal_markers=False,
                 oracle_reset_prob=0.0,
                 xml_path='sawyer_xyz/three_blocks.xml',
                 **kwargs):
        self.quick_init(locals())
        MultitaskEnv.__init__(self)
        SawyerXYZEnv.__init__(self,
                              model_name=get_asset_full_path(xml_path),
                              **kwargs)

        self.block_low = np.array(block_low)
        self.block_high = np.array(block_high)
        self.block_radius = 0.02

        self.hand_low = np.array(hand_low)
        self.hand_high = np.array(hand_high)

        self.stack_goal_low = np.array(stack_goal_low)
        self.stack_goal_high = np.array(stack_goal_high)

        self.max_place_distance = max(
            np.linalg.norm((self.stack_goal_high - self.block_low), ord=2),
            np.linalg.norm((self.block_high - self.stack_goal_low), ord=2),
        )

        self.fix_goal = fix_goal
        self.fixed_stack_goal = np.array(fixed_stack_goal)

        self.use_sparse_reward = use_sparse_reward
        self.sparse_reward_threshold = sparse_reward_threshold

        self.reset_free = reset_free
        self.hide_goal_markers = hide_goal_markers
        self.oracle_reset_prob = oracle_reset_prob

        self.action_space = Box(np.array([-1, -1, -1, -1]),
                                np.array([1, 1, 1, 1]),
                                dtype=np.float32)

        diff = (self.block_high[0] - self.block_low[0]) / 3.0
        midpoint = diff + self.block_low[0]
        block_one_high = [midpoint, *self.block_high[1:]]
        block_two_low = [midpoint, *self.block_low[1:]]
        midpoint = midpoint + diff
        block_two_high = [midpoint, *self.block_high[1:]]
        block_three_low = [midpoint, *self.block_low[1:]]

        self.sampling_space = Box(np.hstack(
            ([0.0], self.hand_low, self.block_low, block_two_low,
             block_three_low)),
                                  np.hstack(
                                      ([0.04], self.hand_high, block_one_high,
                                       block_two_high, self.block_high)),
                                  dtype=np.float32)

        self.gripper_and_hand_and_blocks_space = Box(
            np.hstack(([0.0], self.hand_low, self.block_low, self.block_low,
                       self.block_low)),
            np.hstack(([0.04], self.hand_high, self.block_high,
                       self.block_high, self.block_high)),
            dtype=np.float32)

        self.observation_space = Dict([
            ('observation', self.gripper_and_hand_and_blocks_space),
            ('desired_goal', self.gripper_and_hand_and_blocks_space),
            ('achieved_goal', self.gripper_and_hand_and_blocks_space),
            ('state_observation', self.gripper_and_hand_and_blocks_space),
            ('state_desired_goal', self.gripper_and_hand_and_blocks_space),
            ('state_achieved_goal', self.gripper_and_hand_and_blocks_space),
            ('proprio_observation', self.gripper_and_hand_and_blocks_space),
            ('proprio_desired_goal', self.gripper_and_hand_and_blocks_space),
            ('proprio_achieved_goal', self.gripper_and_hand_and_blocks_space),
        ])

        self.block_one_id = self.model.get_joint_qpos_addr("blockOneJoint")
        self.block_two_id = self.model.get_joint_qpos_addr("blockTwoJoint")
        self.block_three_id = self.model.get_joint_qpos_addr("blockThreeJoint")
        self.reset()

Esempio n. 20

0

Mostra file

    def __init__(
            self,
            obj_low=None,
            obj_high=None,

            reward_type='hand_and_obj_distance',
            indicator_threshold=0.02,

            fix_goal=False,
            fixed_goal=None,
            goal_low=None,
            goal_high=None,
            hard_goals=False,

            hide_goal_markers=True,
            presampled_goals=None,
            num_goals_presampled=1000,
            norm_order=2,
            structure='2d',

            two_obj=False,
            frame_skip=100,
            reset_p=None,
            goal_p=None,

            fixed_reset=None,
            hide_state_markers=True,

            test_mode_case_num=None,
            **kwargs
    ):
        self.quick_init(locals())
        # self.obj_id[0] for first object, self.obj_id[1] for second object.
        self.obj_id = {
            0: '',
            1: '2',
        }

        assert structure in ['2d', '3d', '2d_wall_short', '2d_wall_tall', '2d_wall_tall_dark']
        self.structure = structure
        self.two_obj = two_obj
        self.hard_goals = hard_goals
        MultitaskEnv.__init__(self)
        SawyerXYZEnv.__init__(
            self,
            model_name=self.model_name,
            frame_skip=frame_skip,
            **kwargs
        )
        self.norm_order = norm_order
        if obj_low is None:
            obj_low = np.copy(self.hand_low)
            obj_low[2] = 0.0
        if obj_high is None:
            obj_high = np.copy(self.hand_high)
        self.obj_low = np.array(obj_low)
        self.obj_high = np.array(obj_high)
        if goal_low is None:
            goal_low = np.hstack((self.hand_low, obj_low))
        if goal_high is None:
            goal_high = np.hstack((self.hand_high, obj_high))

        self.reward_type = reward_type
        self.indicator_threshold = indicator_threshold

        self.subgoals = None
        self.fix_goal = fix_goal
        self._state_goal = None

        self.hide_goal_markers = hide_goal_markers
        self.hide_state_markers = hide_state_markers

        self.action_space = Box(
            np.array([-1, -1, -1, -1]),
            np.array([1, 1, 1, 1]),
            dtype=np.float32
        )
        if self.two_obj:
            self.hand_and_obj_space = Box(
                np.hstack((self.hand_low, obj_low, obj_low)),
                np.hstack((self.hand_high, obj_high, obj_high)),
                dtype=np.float32
            )
        else:
            self.hand_and_obj_space = Box(
                np.hstack((self.hand_low, obj_low)),
                np.hstack((self.hand_high, obj_high)),
                dtype=np.float32
            )
        self.hand_space = Box(
            self.hand_low,
            self.hand_high,
            dtype=np.float32
        )
        if self.two_obj:
            self.gripper_and_hand_and_obj_space = Box(
                np.hstack((self.hand_low, obj_low, obj_low, [0.0])),
                np.hstack((self.hand_high, obj_high, obj_high, [0.07])),
                dtype=np.float32
            )
        else:
            self.gripper_and_hand_and_obj_space = Box(
                np.hstack((self.hand_low, obj_low, [0.0])),
                np.hstack((self.hand_high, obj_high, [0.07])),
                dtype=np.float32
            )

        self.observation_space = Dict([
            ('observation', self.gripper_and_hand_and_obj_space),
            ('desired_goal', self.gripper_and_hand_and_obj_space),
            ('achieved_goal', self.gripper_and_hand_and_obj_space),
            ('state_observation', self.gripper_and_hand_and_obj_space),
            ('state_desired_goal', self.gripper_and_hand_and_obj_space),
            ('state_achieved_goal', self.gripper_and_hand_and_obj_space),
            ('proprio_observation', self.hand_space),
            ('proprio_desired_goal', self.hand_space),
            ('proprio_achieved_goal', self.hand_space),
        ])

        if presampled_goals is not None:
            self._presampled_goals = presampled_goals
            self.num_goals_presampled = len(list(self._presampled_goals.values)[0])
        else:
            # if num_goals_presampled > 0, presampled_goals will be created when sample_goal is first called
            self._presampled_goals = None
            self.num_goals_presampled = num_goals_presampled
        self.picked_up_object = False
        self.train_pickups = 0
        self.eval_pickups = 0
        self.cur_mode = 'train'

        self.obj_radius = np.array([0.018, 0.016]) #0.020
        self.ee_radius = np.array([0.053, 0.058]) #0.065
        if self.structure == '2d_wall_short':
            self.wall_radius = np.array([0.03, 0.02])
            self.wall_center = np.array([0.0, 0.60, 0.02])
        elif self.structure in ['2d_wall_tall', '2d_wall_tall_dark']:
            self.wall_radius = np.array([0.015, 0.04])
            self.wall_center = np.array([0.0, 0.60, 0.04])
        else:
            self.wall_radius = None
            self.wall_center = None
        # self.obj_radius = 0.020
        # self.ee_radius = 0.065
        if fixed_reset is not None:
            fixed_reset = np.array(fixed_reset)
        self.fixed_reset = fixed_reset
        if fixed_goal is not None:
            fixed_goal = np.array(fixed_goal)
        self.fixed_goal = fixed_goal
        self.reset_p = reset_p
        self.goal_p = goal_p

        self.test_mode_case_num = test_mode_case_num
        if self.test_mode_case_num == 1:
            self.fixed_reset = np.array([0.0, 0.50, 0.05, 0.0, 0.50, 0.015, 0.0, 0.70, 0.015])
            self.fixed_goal = np.array([0.0, 0.50, 0.10, 0.0, 0.70, 0.03, 0.0, 0.70, 0.015])
        elif self.test_mode_case_num == 2:
            self.fixed_reset = np.array([0.0, 0.50, 0.05, 0.0, 0.50, 0.015, 0.0, 0.70, 0.015])
            self.fixed_goal = np.array([0.0, 0.70, 0.10, 0.0, 0.50, 0.015, 0.0, 0.50, 0.03])
        elif self.test_mode_case_num == 3:
            self.fixed_reset = np.array([0.0, 0.60, 0.05, 0.0, 0.50, 0.015, 0.0, 0.70, 0.015])
            self.fixed_goal = np.array([0.0, 0.60, 0.05, 0.0, 0.70, 0.015, 0.0, 0.50, 0.015])

        if presampled_goals is not None:
            self.reset()
        else:
            self.num_goals_presampled = 1
            self.reset()
            self._presampled_goals = None
            self.num_goals_presampled = num_goals_presampled

Esempio n. 21

0

Mostra file

File: room_env.py Progetto: scottemmons/gcsl

    def __init__(self,
                # Room
                room,
                # Start and Goal
                 start_config="all",
                 goal_config="all",
                # Reward
                 potential_type='',
                 shaped=False,
                 base_reward='',
                # State and Goal Representations
                 use_state_images=False,
                 use_goal_images=False,
                 image_resolution=64,
                 # Time Limits 
                 max_path_length=None,
                 *args, **kwargs
                ):

        # Initialize
        Serializable.quick_init(self, locals())
        MultitaskEnv.__init__(self)
        
        
        # Environment Configuration
        self._room = room
        model = self.room.get_mjcmodel()
        self.possible_positions = self.room.XY(n=50)

        with model.asfile() as f:
            MujocoEnv.__init__(self, f.name, frame_skip=self.FRAME_SKIP)


        # Initialization 
        self.start_config = start_config
        self.baseline_start = self.room.get_start()
        self.start = np.zeros_like(self.baseline_start)

        self.goal_config = goal_config
        self.baseline_goal = self.room.get_target()
        self.goal = np.zeros_like(self.baseline_goal)

        # Time Limit
        self.curr_path_length = 0
        if max_path_length is None:
            self.max_path_length = self.MAX_PATH_LENGTH
        else:
            self.max_path_length = max_path_length

        # Reward Functions
        self.potential_type = potential_type
        self.shaped = shaped
        self.base_reward = base_reward

        # Action Space
        bounds = self.model.actuator_ctrlrange.copy()
        self.action_space = Box(low=bounds[:, 0], high=bounds[:, 1])

        self.use_state_images = use_state_images
        self.use_goal_images = use_goal_images
        self.image_resolution = image_resolution

        # Observation Space
        example_state_obs = self._get_env_obs()
        if self.use_state_images:
            example_obs = self.get_image(self.image_resolution, self.image_resolution, camera_name='topview')
        else:
            example_obs = example_state_obs
        
        state_obs_shape = example_obs.shape
        obs_shape = example_obs.shape
        self.obs_space = Box(-1 * np.ones(obs_shape),np.ones(obs_shape))
        self.state_obs_space = Box(-1 * np.ones(state_obs_shape), np.ones(state_obs_shape))


        # Goal Space
        
        
        example_state_goal = self._get_env_achieved_goal(example_state_obs)
        if self.use_goal_images:
            example_goal = self.get_image(self.image_resolution, self.image_resolution, camera_name='topview')
        else:
            example_goal = example_state_goal

        state_goal_shape = example_state_goal.shape
        goal_shape = example_goal.shape
        self.goal_space = Box(-1 * np.ones(goal_shape), np.ones(goal_shape))
        self.state_goal_space =  Box(-1 * np.ones(state_goal_shape), np.ones(state_goal_shape))

        # Final Setup
        self.observation_space = Dict([
            ('observation', self.obs_space),
            ('desired_goal', self.goal_space),
            ('achieved_goal', self.goal_space),
            ('state_observation', self.state_obs_space),
            ('state_desired_goal', self.state_goal_space),
            ('state_achieved_goal', self.state_goal_space),
        ])

        self.reset()

Esempio n. 22

0

Mostra file

    def __init__(self,
                 reward_type='dense',
                 norm_order=2,
                 action_scale=0.15,
                 frame_skip=100,
                 ball_low=(-4.0, -4.0),
                 ball_high=(4.0, 4.0),
                 goal_low=(-4.0, -4.0),
                 goal_high=(4.0, 4.0),
                 model_path='pointmass_u_wall_big.xml',
                 reset_low=None,
                 reset_high=None,
                 v_func_heatmap_bounds=(-2.0, 0.0),
                 *args,
                 **kwargs):
        self.quick_init(locals())
        MultitaskEnv.__init__(self)
        MujocoEnv.__init__(self,
                           model_path=get_asset_full_path('pointmass/' +
                                                          model_path),
                           frame_skip=frame_skip,
                           **kwargs)

        self.action_space = Box(np.array([-1, -1]),
                                np.array([1, 1]),
                                dtype=np.float32)
        self.action_scale = action_scale
        self.ball_radius = 0.50
        self.walls = [
            Wall(0, 1.0, 2.5, 0.5, self.ball_radius),
            Wall(2.0, -0.5, 0.5, 2.0, self.ball_radius),
            Wall(-2.0, -0.5, 0.5, 2.0, self.ball_radius),
        ]

        self.reward_type = reward_type
        self.norm_order = norm_order

        self.ball_low, self.ball_high = np.array(ball_low), np.array(ball_high)
        self.goal_low, self.goal_high = np.array(goal_low), np.array(goal_high)

        if reset_low is None:
            self.reset_low = np.array(ball_low)
        else:
            self.reset_low = np.array(reset_low)
        if reset_high is None:
            self.reset_high = np.array(ball_high)
        else:
            self.reset_high = np.array(reset_high)

        obs_space_low = np.copy(self.ball_low)
        obs_space_high = np.copy(self.ball_high)
        goal_space_low = np.copy(self.goal_low)
        goal_space_high = np.copy(self.goal_high)

        self.obs_space = Box(obs_space_low, obs_space_high, dtype=np.float32)
        self.goal_space = Box(goal_space_low,
                              goal_space_high,
                              dtype=np.float32)

        self.observation_space = Dict([
            ('observation', self.obs_space),
            ('desired_goal', self.goal_space),
            ('achieved_goal', self.obs_space),
            ('state_observation', self.obs_space),
            ('state_desired_goal', self.goal_space),
            ('state_achieved_goal', self.obs_space),
            ('proprio_observation', self.obs_space),
            ('proprio_desired_goal', self.goal_space),
            ('proprio_achieved_goal', self.obs_space),
        ])

        self.v_func_heatmap_bounds = v_func_heatmap_bounds

        self._state_goal = None
        self.reset()

Esempio n. 23

0

Mostra file

File: wheeled_car.py Progetto: wuyx/LeapPaper

    def __init__(
            self,
            reward_type='state_distance',
            norm_order=2,
            action_scale=20,
            frame_skip=3,
            two_frames=False,
            vel_in_state=True,
            z_in_state=True,
            car_low=(-1.90, -1.90),  #list([-1.60, -1.60]),
            car_high=(1.90, 1.90),  #list([1.60, 1.60]),
            goal_low=(-1.90, -1.90),  #list([-1.60, -1.60]),
            goal_high=(1.90, 1.90),  #list([1.60, 1.60]),
            model_path='wheeled_car.xml',
            reset_low=None,
            reset_high=None,
            *args,
            **kwargs):
        self.quick_init(locals())
        MultitaskEnv.__init__(self)
        MujocoEnv.__init__(self,
                           model_path=get_asset_full_path('locomotion/' +
                                                          model_path),
                           frame_skip=frame_skip,
                           **kwargs)

        self.action_space = Box(np.array([-1, -1]),
                                np.array([1, 1]),
                                dtype=np.float32)
        self.action_scale = action_scale

        # set radius
        if model_path in ['wheeled_car.xml', 'wheeled_car_u_wall.xml']:
            self.car_radius = np.sqrt(2) * 0.34
        elif model_path == 'wheeled_car_old.xml':
            self.car_radius = 0.30
        else:
            raise NotImplementedError

        # set walls
        if model_path == 'wheeled_car_u_wall.xml':
            self.walls = [
                Wall(0, -0.5, 0.75, 0.05, self.car_radius),
                Wall(0.70, 0.05, 0.05, 0.5, self.car_radius),
                Wall(-0.70, 0.05, 0.05, 0.5, self.car_radius),
            ]
        else:
            self.walls = []

        self.reward_type = reward_type
        self.norm_order = norm_order

        self.car_low, self.car_high = np.array(car_low), np.array(car_high)
        self.goal_low, self.goal_high = np.array(goal_low), np.array(goal_high)

        if reset_low is None:
            self.reset_low = np.array(car_low)
        else:
            self.reset_low = np.array(reset_low)
        if reset_high is None:
            self.reset_high = np.array(car_high)
        else:
            self.reset_high = np.array(reset_high)

        self.car_low += self.car_radius
        self.car_high -= self.car_radius
        self.goal_low += self.car_radius
        self.goal_high -= self.car_radius
        self.reset_low += self.car_radius
        self.reset_high -= self.car_radius

        self.two_frames = two_frames
        self.vel_in_state = vel_in_state
        self.z_in_state = z_in_state

        # x and y pos
        obs_space_low = np.copy(self.car_low)
        obs_space_high = np.copy(self.car_high)
        goal_space_low = np.copy(self.goal_low)
        goal_space_high = np.copy(self.goal_high)

        # z pos
        if self.z_in_state:
            obs_space_low = np.concatenate((obs_space_low, [-1]))
            obs_space_high = np.concatenate((obs_space_high, [0.03]))
            goal_space_low = np.concatenate((goal_space_low, [0]))
            goal_space_high = np.concatenate((goal_space_high, [0]))

        # sin and cos
        obs_space_low = np.concatenate((obs_space_low, [-1, -1]))
        obs_space_high = np.concatenate((obs_space_high, [1, 1]))
        goal_space_low = np.concatenate((goal_space_low, [-1, -1]))
        goal_space_high = np.concatenate((goal_space_high, [1, 1]))

        if self.vel_in_state:
            # x and y vel
            obs_space_low = np.concatenate((obs_space_low, [-10, -10]))
            obs_space_high = np.concatenate((obs_space_high, [10, 10]))
            goal_space_low = np.concatenate((goal_space_low, [0, 0]))
            goal_space_high = np.concatenate((goal_space_high, [0, 0]))

            # z vel
            if self.z_in_state:
                obs_space_low = np.concatenate((obs_space_low, [-10]))
                obs_space_high = np.concatenate((obs_space_high, [10]))
                goal_space_low = np.concatenate((goal_space_low, [0]))
                goal_space_high = np.concatenate((goal_space_high, [0]))

            # theta vel
            obs_space_low = np.concatenate((obs_space_low, [-10]))
            obs_space_high = np.concatenate((obs_space_high, [10]))
            goal_space_low = np.concatenate((goal_space_low, [0]))
            goal_space_high = np.concatenate((goal_space_high, [0]))

        self.obs_space = Box(obs_space_low, obs_space_high, dtype=np.float32)
        self.goal_space = Box(goal_space_low,
                              goal_space_high,
                              dtype=np.float32)
        if self.two_frames:
            self.obs_space = concatenate_box_spaces(self.obs_space,
                                                    self.obs_space)
            self.goal_space = concatenate_box_spaces(self.goal_space,
                                                     self.goal_space)

        self.observation_space = Dict([
            ('observation', self.obs_space),
            ('desired_goal', self.goal_space),
            ('achieved_goal', self.obs_space),
            ('state_observation', self.obs_space),
            ('state_desired_goal', self.goal_space),
            ('state_achieved_goal', self.obs_space),
            ('proprio_observation', self.obs_space),
            ('proprio_desired_goal', self.goal_space),
            ('proprio_achieved_goal', self.obs_space),
        ])

        self._state_goal = None
        self._cur_obs = None
        self._prev_obs = None
        self.reset()

Esempio n. 24

0

Mostra file

File: sawyer_push_and_reach_env_two_pucks.py Progetto: corl2019metaworld/metaworld

    def __init__(self,
                 puck_low=(-.4, .2),
                 puck_high=(.4, 1),
                 reward_type='state_distance',
                 norm_order=1,
                 indicator_threshold=0.06,
                 hand_low=(-0.28, 0.3, 0.05),
                 hand_high=(0.28, 0.9, 0.3),
                 fix_goal=False,
                 fixed_goal=(0.15, 0.6, 0.02, -0.15, 0.6),
                 goal_low=(-0.25, 0.3, 0.02, -.2, .4, -.2, .4),
                 goal_high=(0.25, 0.875, 0.02, .2, .8, .2, .8),
                 hide_goal_markers=False,
                 init_puck_z=0.02,
                 num_resets_before_puck_reset=1,
                 num_resets_before_hand_reset=1,
                 always_start_on_same_side=True,
                 goal_always_on_same_side=True,
                 **kwargs):
        self.quick_init(locals())
        MultitaskEnv.__init__(self)
        SawyerXYZEnv.__init__(self,
                              hand_low=hand_low,
                              hand_high=hand_high,
                              model_name=self.model_name,
                              **kwargs)
        if puck_low is None:
            puck_low = self.hand_low[:2]
        if puck_high is None:
            puck_high = self.hand_high[:2]
        puck_low = np.array(puck_low)
        puck_high = np.array(puck_high)

        self.puck_low = puck_low
        self.puck_high = puck_high

        if goal_low is None:
            goal_low = np.hstack((self.hand_low, puck_low, puck_low))
        if goal_high is None:
            goal_high = np.hstack((self.hand_high, puck_high, puck_high))
        self.goal_low = np.array(goal_low)
        self.goal_high = np.array(goal_high)

        self.reward_type = reward_type
        self.norm_order = norm_order
        self.indicator_threshold = indicator_threshold

        self.fix_goal = fix_goal
        self.fixed_goal = np.array(fixed_goal)
        self._state_goal = None

        self.hide_goal_markers = hide_goal_markers

        self.action_space = Box(np.array([-1, -1, -1]),
                                np.array([1, 1, 1]),
                                dtype=np.float32)
        self.hand_and_two_puck_space = Box(np.hstack(
            (self.hand_low, puck_low, puck_low)),
                                           np.hstack((self.hand_high,
                                                      puck_high, puck_high)),
                                           dtype=np.float32)
        self.hand_space = Box(self.hand_low, self.hand_high, dtype=np.float32)
        self.puck_space = Box(self.puck_low, self.puck_high, dtype=np.float32)
        self.observation_space = Dict([
            ('observation', self.hand_and_two_puck_space),
            ('desired_goal', self.hand_and_two_puck_space),
            ('achieved_goal', self.hand_and_two_puck_space),
            ('state_observation', self.hand_and_two_puck_space),
            ('state_desired_goal', self.hand_and_two_puck_space),
            ('state_achieved_goal', self.hand_and_two_puck_space),
            ('proprio_observation', self.hand_space),
            ('proprio_desired_goal', self.hand_space),
            ('proprio_achieved_goal', self.hand_space),
        ])
        self.init_puck_z = init_puck_z
        self.reset_counter = 0
        self.num_resets_before_puck_reset = num_resets_before_puck_reset
        self.num_resets_before_hand_reset = num_resets_before_hand_reset
        self._always_start_on_same_side = always_start_on_same_side
        self._goal_always_on_same_side = goal_always_on_same_side

        self._set_puck_xys(self._sample_puck_xys())

Esempio n. 25

0

Mostra file

File: sawyer_push_and_reach_env_reset.py Progetto: tianheyu927/multiworld

    def __init__(self,
                 reward_type='puck_distance',
                 norm_order=2,
                 indicator_threshold=0.06,
                 fix_goal=False,
                 fixed_goal=(0.0, 0.6, 0.02, -0.15, 0.6),
                 puck_low=None,
                 puck_high=None,
                 hand_low=(-0.25, 0.3, 0.02),
                 hand_high=(0.25, .8, .02),
                 goal_low=(-0.25, 0.3),
                 goal_high=(0.25, 0.8),
                 hide_goal_markers=False,
                 init_puck_z=0.02,
                 reset_free=False,
                 init_puck_pos=(0.0, .65),
                 mode='train',
                 **kwargs):
        self.quick_init(locals())
        MultitaskEnv.__init__(self)
        SawyerXYZEnv.__init__(self,
                              hand_low=hand_low,
                              hand_high=hand_high,
                              model_name=self.model_name,
                              **kwargs)
        if puck_low is None:
            puck_low = self.hand_low[:2]
        if puck_high is None:
            puck_high = self.hand_high[:2]
        puck_low = np.array(puck_low)
        puck_high = np.array(puck_high)

        self.puck_low = puck_low
        self.puck_high = puck_high

        if goal_low is None:
            goal_low = np.array(puck_low)
        if goal_high is None:
            goal_high = np.array(puck_high)

        self.goal_low = np.array(goal_low)
        self.goal_high = np.array(goal_high)

        self.reward_type = reward_type
        self.norm_order = norm_order
        self.indicator_threshold = indicator_threshold

        self.fix_goal = fix_goal
        self.fixed_goal = np.array(fixed_goal)
        self._state_goal = None

        self.hide_goal_markers = hide_goal_markers

        self.action_space = Box(np.array([-1, -1, -1]), np.array([1, 1, 1]))
        self.hand_and_puck_space = Box(
            np.hstack((self.hand_low, puck_low)),
            np.hstack((self.hand_high, puck_high)),
        )
        self.puck_space = Box(puck_low, puck_high)
        self.hand_space = Box(self.hand_low, self.hand_high)
        self.observation_space = Dict([
            ('observation', self.hand_and_puck_space),
            ('desired_goal', self.puck_space),
            ('achieved_goal', self.puck_space),
            ('state_observation', self.hand_and_puck_space),
            ('state_desired_goal', self.puck_space),
            ('state_achieved_goal', self.puck_space),
        ])
        self.init_puck_z = init_puck_z
        self.reset_free = reset_free
        self.puck_pos = self.get_puck_pos()[:2]
        self.mode(mode)
        self.init_puck_pos = np.array(init_puck_pos)

Esempio n. 26

0

Mostra file

    def __init__(
            self,
            obj_low=(-0.1, 0.35, 0.05),
            obj_high=(0.1, 0.55, 0.05),
            reward_type='dense',
            indicator_threshold=0.06,
            random_init=True,

            # hand_low=(-0.5, 0.40, 0.05),
            # hand_high=(0.5, 1, 0.5),
            hand_low=(-0.7, 0.30, 0.05),
            hand_high=(0.7, 1, 0.5),
            goal_low=None,
            goal_high=None,
            reset_free=False,
            num_beads=7,
            init_pos=(0., 0.4, 0.02),
            substeps=50,
            log_video=False,
            video_substeps=5,
            video_h=500,
            video_w=500,
            camera_name='leftcam',
            sparse=False,
            action_penalty_const=1e-2,
            rotMode='fixed',  #'fixed',
            **kwargs):
        self.quick_init(locals())
        MultitaskEnv.__init__(self)
        model = rope(num_beads=num_beads, init_pos=init_pos, texture=True)
        import os
        model.save(os.path.expanduser('~/sawyer_rope.xml'))
        with model.asfile() as f:
            SawyerXYZEnv.__init__(self,
                                  model_name=f.name,
                                  frame_skip=5,
                                  **kwargs)
        if obj_low is None:
            obj_low = self.hand_low
        if obj_high is None:
            obj_high = self.hand_high
        self.obj_low = obj_low
        self.obj_high = obj_high
        self.reset_free = reset_free

        #sim params
        self.num_beads = num_beads
        self.init_pos = init_pos
        self.substeps = substeps  # number of intermediate positions to generate

        #reward params
        self.sparse = sparse
        self.action_penalty_const = action_penalty_const

        #video params
        self.log_video = log_video
        self.video_substeps = video_substeps
        self.video_h = video_h
        self.video_w = video_w
        self.camera_name = camera_name

        self._state_goal = np.asarray([1.0, 0.0])
        self.rotMode = rotMode
        self.random_init = random_init
        if rotMode == 'fixed':
            self.action_space = Box(
                np.array([-1, -1, -1, -1]),
                np.array([1, 1, 1, 1]),
            )
        elif rotMode == 'rotz':
            self.action_rot_scale = 1. / 10
            self.action_space = Box(
                np.array([-1, -1, -1, -1, -1]),
                np.array([1, 1, 1, 1, 1]),
            )
        elif rotMode == 'quat':
            self.action_space = Box(
                np.array([-1, -1, -1, 0, -1, -1, -1, -1]),
                np.array([1, 1, 1, 2 * np.pi, 1, 1, 1, 1]),
            )
        else:
            self.action_space = Box(
                np.array([-1, -1, -1, -np.pi / 2, -np.pi / 2, 0, -1]),
                np.array([1, 1, 1, np.pi / 2, np.pi / 2, np.pi * 2, 1]),
            )

        self.obj_and_goal_space = Box(np.array(obj_low),
                                      np.array(obj_high),
                                      dtype=np.float32)
        self.hand_space = Box(self.hand_low, self.hand_high, dtype=np.float32)

        self.observation_space = Box(np.hstack(
            (self.hand_low, list(obj_low) * self.num_beads)),
                                     np.hstack(
                                         (self.hand_high,
                                          list(obj_high) * self.num_beads)),
                                     dtype=np.float32)

        self.hand_reset_pos = np.array([0, .6, .2])
        self.reset()

Esempio n. 27

0

Mostra file

File: sawyer_pick_and_place_multiobj_2.py Progetto: edessa/multiworld

    def __init__(
            self,
            obj_low=None,
            obj_high=None,

            hand_low=(-0.1, 0.55, 0.05),
            hand_high=(0.05, 0.65, 0.2),

            reward_type='hand_and_obj_distance',
            indicator_threshold=0.06,
            frame_skip = 50,
            obj_init_positions=((0, 0.6, 0.02),),
            random_init=False,

            num_objects=1,
            fix_goal=False,

            mocap_low=(-0.13, 0.57, 0.04),
            mocap_high=(0.08, 0.73, 0.2),
            action_scale=0.02,

            fixed_goal=(0.15, 0.6, 0.055, -0.15, 0.6),
            goal_low=None,
            goal_high=None,
            reset_free=False,
            filename='sawyer_pick_and_place_multiobj.xml',
            object_mass=1,
            # object_meshes=['Bowl', 'GlassBowl', 'LotusBowl01', 'ElephantBowl', 'RuggedBowl'],
            object_meshes=None,
            obj_classname = None,
            block_height=0.04,
            block_width = 0.04,
            cylinder_radius = 0.015,
            finger_sensors=False,
            maxlen=0.06,
            minlen=0.01,
            preload_obj_dict=None,
            hide_goal_markers=False,
            oracle_reset_prob=0.0,
            presampled_goals=None,
            num_goals_presampled=2,
            p_obj_in_hand=1,

            **kwargs
    ):
        self.quick_init(locals())
        MultitaskEnv.__init__(self)


        base_filename = asset_base_path + filename
        friction_params = (1, 1, 2)
        self.obj_stat_prop = create_object_xml(base_filename, num_objects, object_mass,
                                              friction_params, object_meshes, finger_sensors,
                                               maxlen, minlen, preload_obj_dict, obj_classname,
                                               block_height, block_width, cylinder_radius)

        gen_xml = create_root_xml(base_filename)
        MujocoEnv.__init__(self, gen_xml, frame_skip=frame_skip)
        self.reset_mocap_welds()

        clean_xml(gen_xml)

        self.mocap_low = mocap_low
        self.mocap_high = mocap_high
        self.action_scale = action_scale


        self.hand_low = np.array(hand_low)
        self.hand_high = np.array(hand_high)

        if obj_low is None:
            obj_low = self.hand_low
        if obj_high is None:
            obj_high = self.hand_high
        self.obj_low = obj_low
        self.obj_high = obj_high
        if goal_low is None:
            goal_low = np.hstack((self.hand_low, obj_low))
        if goal_high is None:
            goal_high = np.hstack((self.hand_high, obj_high))

        self.reward_type = reward_type
        self.random_init = random_init
        self.p_obj_in_hand = p_obj_in_hand
        self.indicator_threshold = indicator_threshold

        self.obj_init_z = obj_init_positions[0][2]
        self.obj_init_positions = np.array(obj_init_positions)
        self.last_obj_pos = self.obj_init_positions[0]

        self.fix_goal = fix_goal
        self.fixed_goal = np.array(fixed_goal)
        self._state_goal = None
        self.reset_free = reset_free
        self.oracle_reset_prob = oracle_reset_prob

        self.obj_ind_to_manip = 0

        self.hide_goal_markers = hide_goal_markers

        self.action_space = Box(
            np.array([-1, -1, -1, -1]),
            np.array([1, 1, 1, 1]),
            dtype=np.float32
        )
        self.hand_and_obj_space = Box(
            np.hstack((self.hand_low, obj_low)),
            np.hstack((self.hand_high, obj_high)),
            dtype=np.float32
        )
        self.hand_space = Box(
            self.hand_low,
            self.hand_high,
            dtype=np.float32
        )
        self.gripper_and_hand_and_obj_space = Box(
            np.hstack(([0.0], self.hand_low, obj_low)),
            np.hstack(([0.04], self.hand_high, obj_high)),
            dtype=np.float32
        )
        self.num_objects = num_objects
        self.maxlen = maxlen

        self.observation_space = Dict([
            ('observation', self.gripper_and_hand_and_obj_space),
            ('desired_goal', self.hand_and_obj_space),
            ('achieved_goal', self.hand_and_obj_space),
            ('state_observation', self.gripper_and_hand_and_obj_space),
            ('state_desired_goal', self.hand_and_obj_space),
            ('state_achieved_goal', self.hand_and_obj_space),
            ('proprio_observation', self.hand_space),
            ('proprio_desired_goal', self.hand_space),
            ('proprio_achieved_goal', self.hand_space),
        ])
        self.hand_reset_pos = np.array([0, .6, .2])

        if presampled_goals is not None:
            self._presampled_goals = presampled_goals
            self.num_goals_presampled = len(list(self._presampled_goals.values)[0])
        else:
            # presampled_goals will be created when sample_goal is first called
            self._presampled_goals = None
            self.num_goals_presampled = num_goals_presampled
        self.num_goals_presampled = 10
        self.picked_up_object = False
        self.train_pickups = 0
        self.eval_pickups = 0
        self.cur_mode = 'train'
        self.reset()

Esempio n. 28

0

Mostra file

 def sample_goal(self):
     return MultitaskEnv.sample_goal(self)