Exemple #1
0
 def model_name(self):
     if(self.puckAndRobotInvisible == True):
         return get_asset_full_path('sawyer_xyz/sawyer_push_and_reach_mocap_goal_hidden_hurdle_puck_and_robot_invisible.xml')
     if(self.puckInvisible == True):
         return get_asset_full_path('sawyer_xyz/sawyer_push_and_reach_mocap_goal_hidden_hurdle_puck_invisible.xml')
     else:
         return get_asset_full_path('sawyer_xyz/sawyer_push_and_reach_mocap_goal_hidden_hurdle.xml')
Exemple #2
0
 def model_name(self):
     if self.hide_arm:
         print('hiding')
         return get_asset_full_path(
             'sawyer_xyz/sawyer_pick_and_place_hidden_arm.xml')
     print('not hiding')
     return get_asset_full_path('sawyer_xyz/sawyer_pick_and_place.xml')
    def model_name(self):
        if self.obstacle_height == 'multiple':
            return get_asset_full_path('classic_mujoco/walker_obstacles/walker_multiple_obstacles.xml')

        return get_asset_full_path(
            'classic_mujoco/walker_obstacles/walker_obstacle_position={}_height={}.xml'.format(
                self.obstacle_position, self.obstacle_height))
 def model_name(self):
     if self.ballInvisible == True:
         return get_asset_full_path(
             'sawyer_xyz/sawyer_pick_and_place_ball_invisible.xml')
     if self.ballAndRobotInvisible == True:
         return get_asset_full_path(
             'sawyer_xyz/sawyer_pick_and_place_ball_and_robot_invisible.xml'
         )
     else:
         return get_asset_full_path('sawyer_xyz/sawyer_pick_and_place.xml')
Exemple #5
0
    def model_name(self):
        #Remember to set the right limits in the base file (right line needs to be commented out)
        if self.hand_type == 'parallel_v1':
            self.reset_mocap_quat = [1,0,1,0]
            return get_asset_full_path('sawyer_xyz/sawyer_pick_and_place.xml')

        ############################# WSG GRIPPER #############################
        elif self.hand_type == 'weiss_v1':
            self.reset_mocap_quat = zangle_to_quat(np.pi/2) 
            #return get_asset_full_path('sawyer_xyz/sawyer_wsg_pickPlace_mug.xml')
            return get_asset_full_path('sawyer_xyz/sawyer_wsg_pickPlace.xml')
Exemple #6
0
 def model_name(self):
     if self.structure == '3d':
         return get_asset_full_path('sawyer_xyz/sawyer_pick_and_place_3d.xml')
     elif self.structure == '2d':
         return get_asset_full_path('sawyer_xyz/sawyer_pick_and_place_2d.xml')
     elif self.structure == '2d_wall_short':
         return get_asset_full_path('sawyer_xyz/sawyer_pick_and_place_2d_wall_short.xml')
     elif self.structure == '2d_wall_tall':
         return get_asset_full_path('sawyer_xyz/sawyer_pick_and_place_2d_wall_tall.xml')
     elif self.structure == '2d_wall_tall_dark':
         return get_asset_full_path('sawyer_xyz/sawyer_pick_and_place_2d_wall_tall_dark.xml')
Exemple #7
0
    def model_name(self):

        #return get_asset_full_path('sawyer_xyz/sawyer_pickPlace.xml')
        self.reset_mocap_quat = zangle_to_quat(
            np.pi / 2)  #this is the reset_mocap_quat for wsg grippers
        #return get_asset_full_path('sawyer_xyz/sawyer_wsg_pickPlace_mug.xml')
        return get_asset_full_path('sawyer_xyz/sawyer_wsg_pickPlace.xml')
Exemple #8
0
 def __init__(
     self,
     presampled_positions='classic_mujoco/ant_goal_qpos_5x5_xy.npy',
 ):
     self.quick_init(locals())
     super().__init__(
         include_contact_forces_in_state=False,
         xml_path='classic_mujoco/ant_full_state_goal.xml',
     )
     self.goal_space = Box(
         self.observation_space.low[:15],
         self.observation_space.high[:15],
     )
     self.observation_space = Dict([
         ('observation', self.observation_space),
         ('desired_goal', self.goal_space),
         ('achieved_goal', self.goal_space),
     ])
     self.camera_init = create_camera_init(
         lookat=(0, 0, 0),
         distance=15,
         elevation=-45,
         # trackbodyid=self.sim.model.body_name2id('torso'),
     )
     self.presampled_qpos = np.load(
         get_asset_full_path(presampled_positions))
     self._goal = None
     idx = random.randint(0, len(self.presampled_qpos) - 1)
     self.goal = self.presampled_qpos[idx]
Exemple #9
0
 def model_name(self):
     # return get_asset_full_path(
     #     'sawyer_xyz/sawyer_push_and_reach_mocap_goal_hidden.xml'
     # )
     return get_asset_full_path(
         'sawyer_xyz/sawyer_push_and_reach_mocap_goal_hidden.xml'
     )
Exemple #10
0
    def __init__(
        self,
        use_low_gear_ratio=False,
        include_contact_forces_in_state=True,
        xml_path=None,
        max_steps=250,
    ):
        self.quick_init(locals())
        if xml_path is None:
            if use_low_gear_ratio:
                xml_path = 'classic_mujoco/low_gear_ratio_ant.xml'
            else:
                xml_path = 'classic_mujoco/ant.xml'
        super().__init__(
            get_asset_full_path(xml_path),
            frame_skip=5,
        )
        self.include_contact_forces_in_state = include_contact_forces_in_state
        bounds = self.model.actuator_ctrlrange.copy()
        low = bounds[:, 0]
        high = bounds[:, 1]
        self.action_space = Box(low, high)
        obs_size = self._get_env_obs().shape[0]
        high = np.inf * np.ones(obs_size)
        low = -high
        self.observation_space = Box(low, high)

        self.max_steps = max_steps
        self._current_steps = 0
Exemple #11
0
    def __init__(
        self,
        goal_low=(-.25, .3, .12, -1.5708),
        goal_high=(.25, .6, .12, 0),
        action_reward_scale=0,
        reward_type='angle_difference',
        indicator_threshold=(.02, .03),
        fix_goal=False,
        fixed_goal=(0, .45, .12, -.25),
        reset_free=False,
        fixed_hand_z=0.12,
        hand_low=(-0.25, 0.3, .12),
        hand_high=(0.25, 0.6, .12),
        target_pos_scale=1,
        target_angle_scale=1,
        min_angle=-1.5708,
        max_angle=0,
        xml_path='sawyer_xyz/sawyer_door_pull.xml',
        **sawyer_xyz_kwargs
    ):
        self.quick_init(locals())
        self.model_name = get_asset_full_path(xml_path)
        SawyerXYZEnv.__init__(
            self,
            self.model_name,
            hand_low=hand_low,
            hand_high=hand_high,
            **sawyer_xyz_kwargs
        )
        MultitaskEnv.__init__(self)

        self.reward_type = reward_type
        self.indicator_threshold = indicator_threshold

        self.fix_goal = fix_goal
        self.fixed_goal = np.array(fixed_goal)
        self.goal_space = Box(np.array(goal_low), np.array(goal_high))
        self._state_goal = None
        self.fixed_hand_z = fixed_hand_z

        self.action_space = Box(np.array([-1, -1]), np.array([1, 1]))
        self.state_space = Box(
            np.concatenate((hand_low, [min_angle])),
            np.concatenate((hand_high, [max_angle])),
        )
        self.observation_space = Dict([
            ('observation', self.state_space),
            ('desired_goal', self.goal_space),
            ('achieved_goal', self.state_space),
            ('state_observation', self.state_space),
            ('state_desired_goal', self.goal_space),
            ('state_achieved_goal', self.state_space),
        ])
        self.action_reward_scale = action_reward_scale
        self.target_pos_scale = target_pos_scale
        self.target_angle_scale = target_angle_scale
        self.reset_free = reset_free
        self.door_angle_idx = self.model.get_joint_qpos_addr('doorjoint')
        self.reset()
Exemple #12
0
    def model_name(self):

        #return get_asset_full_path('sawyer_xyz/sawyer_pickPlace.xml')
        #self.reset_mocap_quat = zangle_to_quat(np.pi/2) #this is the reset_mocap_quat for wsg grippers

        #self.reset_mocap_quat = zangle_to_quat(-np.pi/2)

        init_quat = [1, 0, 0, 1]

        self.reset_mocap_quat = (Quaternion(axis=[1, 0, 0], angle=-np.pi / 2) *
                                 Quaternion(init_quat)).elements
        return get_asset_full_path('sawyer_xyz/sawyer_wsg_coffee.xml')
Exemple #13
0
    def __init__(self, tasks = [{'goalPos' : [0.2, 0] }] , mode_1d = False,  \
                    init_pos = [0,0] , goal_pos = [0.2, 0], mpl = 200,
                    change_task_every_episode = False,  *args, **kwargs):

        self.quick_init(locals())    
        
        model_name = get_asset_full_path('pointMass/point.xml')
        self.obj_init_pos = init_pos
       
        self.tasks = np.array(tasks)
        self.num_tasks = len(self.tasks)
        self.task = self.tasks[0]
        self.curr_path_length = 0
        self.max_path_length = mpl
        self.mode_1d = mode_1d
        self.change_task_every_episode = change_task_every_episode

        MujocoEnv.__init__(self, model_name, frame_skip=1, automatically_set_spaces=True)
        #Serializable.__init__(self, *args, **kwargs)

        self.info_logKeys = ['targetDist']
Exemple #14
0
    def model_name(self):

        return get_asset_full_path(
            'sawyer_xyz/sawyer_button_press_topdown_wall.xml')
Exemple #15
0
    def __init__(self,
                 block_low=(-0.2, 0.55, 0.02),
                 block_high=(0.2, 0.75, 0.02),
                 hand_low=(0.0, 0.55, 0.3),
                 hand_high=(0.0, 0.55, 0.3),
                 stack_goal_low=(-0.2, 0.55, 0.02),
                 stack_goal_high=(0.2, 0.75, 0.02),
                 fix_goal=False,
                 fixed_stack_goal=(0.0, 0.55, 0.02),
                 fixed_hand_goal=(0.0, 0.75, 0.3),
                 use_sparse_reward=False,
                 sparse_reward_threshold=0.05,
                 reset_free=False,
                 hide_goal_markers=False,
                 oracle_reset_prob=0.0,
                 xml_path='sawyer_xyz/three_blocks.xml',
                 **kwargs):
        self.quick_init(locals())
        MultitaskEnv.__init__(self)
        SawyerXYZEnv.__init__(self,
                              model_name=get_asset_full_path(xml_path),
                              **kwargs)

        self.block_low = np.array(block_low)
        self.block_high = np.array(block_high)
        self.block_radius = 0.02

        self.hand_low = np.array(hand_low)
        self.hand_high = np.array(hand_high)

        self.stack_goal_low = np.array(stack_goal_low)
        self.stack_goal_high = np.array(stack_goal_high)

        self.max_place_distance = max(
            np.linalg.norm((self.stack_goal_high - self.block_low), ord=2),
            np.linalg.norm((self.block_high - self.stack_goal_low), ord=2),
        )

        self.fix_goal = fix_goal
        self.fixed_stack_goal = np.array(fixed_stack_goal)

        self.use_sparse_reward = use_sparse_reward
        self.sparse_reward_threshold = sparse_reward_threshold

        self.reset_free = reset_free
        self.hide_goal_markers = hide_goal_markers
        self.oracle_reset_prob = oracle_reset_prob

        self.action_space = Box(np.array([-1, -1, -1, -1]),
                                np.array([1, 1, 1, 1]),
                                dtype=np.float32)

        diff = (self.block_high[0] - self.block_low[0]) / 3.0
        midpoint = diff + self.block_low[0]
        block_one_high = [midpoint, *self.block_high[1:]]
        block_two_low = [midpoint, *self.block_low[1:]]
        midpoint = midpoint + diff
        block_two_high = [midpoint, *self.block_high[1:]]
        block_three_low = [midpoint, *self.block_low[1:]]

        self.sampling_space = Box(np.hstack(
            ([0.0], self.hand_low, self.block_low, block_two_low,
             block_three_low)),
                                  np.hstack(
                                      ([0.04], self.hand_high, block_one_high,
                                       block_two_high, self.block_high)),
                                  dtype=np.float32)

        self.gripper_and_hand_and_blocks_space = Box(
            np.hstack(([0.0], self.hand_low, self.block_low, self.block_low,
                       self.block_low)),
            np.hstack(([0.04], self.hand_high, self.block_high,
                       self.block_high, self.block_high)),
            dtype=np.float32)

        self.observation_space = Dict([
            ('observation', self.gripper_and_hand_and_blocks_space),
            ('desired_goal', self.gripper_and_hand_and_blocks_space),
            ('achieved_goal', self.gripper_and_hand_and_blocks_space),
            ('state_observation', self.gripper_and_hand_and_blocks_space),
            ('state_desired_goal', self.gripper_and_hand_and_blocks_space),
            ('state_achieved_goal', self.gripper_and_hand_and_blocks_space),
            ('proprio_observation', self.gripper_and_hand_and_blocks_space),
            ('proprio_desired_goal', self.gripper_and_hand_and_blocks_space),
            ('proprio_achieved_goal', self.gripper_and_hand_and_blocks_space),
        ])

        self.block_one_id = self.model.get_joint_qpos_addr("blockOneJoint")
        self.block_two_id = self.model.get_joint_qpos_addr("blockTwoJoint")
        self.block_three_id = self.model.get_joint_qpos_addr("blockThreeJoint")
        self.reset()
Exemple #16
0
 def model_name(self):
     return get_asset_full_path('classic_mujoco/hopper.xml')
    def __init__(self,
                 puck_low=(-.4, .2),
                 puck_high=(.4, 1),
                 reward_type='state_distance',
                 norm_order=1,
                 indicator_threshold=0.06,
                 hand_low=(-0.28, 0.3, 0.05),
                 hand_high=(0.28, 0.9, 0.3),
                 fix_goal=False,
                 fixed_goal=(0.15, 0.6, 0.02, -0.15, 0.6),
                 goal_low=(-0.25, 0.3, 0.02, -.2, .4),
                 goal_high=(0.25, 0.875, 0.02, .2, .8),
                 hide_goal_markers=False,
                 init_puck_z=0.02,
                 init_hand_xyz=(0, 0.4, 0.07),
                 reset_free=False,
                 xml_path='sawyer_xyz/sawyer_push_puck.xml',
                 clamp_puck_on_step=False,
                 puck_radius=.07,
                 **kwargs):
        self.quick_init(locals())
        self.model_name = get_asset_full_path(xml_path)
        MultitaskEnv.__init__(self)
        SawyerXYZEnv.__init__(self,
                              hand_low=hand_low,
                              hand_high=hand_high,
                              model_name=self.model_name,
                              **kwargs)
        if puck_low is None:
            puck_low = self.hand_low[:2]
        if puck_high is None:
            puck_high = self.hand_high[:2]
        puck_low = np.array(puck_low)
        puck_high = np.array(puck_high)

        self.puck_low = puck_low
        self.puck_high = puck_high

        if goal_low is None:
            goal_low = np.hstack((self.hand_low, puck_low))
        if goal_high is None:
            goal_high = np.hstack((self.hand_high, puck_high))
        self.goal_low = np.array(goal_low)
        self.goal_high = np.array(goal_high)

        self.reward_type = reward_type
        self.norm_order = norm_order
        self.indicator_threshold = indicator_threshold

        self.fix_goal = fix_goal
        self.fixed_goal = np.array(fixed_goal)
        self._state_goal = None

        self.hide_goal_markers = hide_goal_markers

        self.action_space = Box(np.array([-1, -1, -1]),
                                np.array([1, 1, 1]),
                                dtype=np.float32)
        self.hand_and_puck_space = Box(np.hstack((self.hand_low, puck_low)),
                                       np.hstack((self.hand_high, puck_high)),
                                       dtype=np.float32)
        self.hand_space = Box(self.hand_low, self.hand_high, dtype=np.float32)
        self.observation_space = Dict([
            ('observation', self.hand_and_puck_space),
            ('desired_goal', self.hand_and_puck_space),
            ('achieved_goal', self.hand_and_puck_space),
            ('state_observation', self.hand_and_puck_space),
            ('state_desired_goal', self.hand_and_puck_space),
            ('state_achieved_goal', self.hand_and_puck_space),
            ('proprio_observation', self.hand_space),
            ('proprio_desired_goal', self.hand_space),
            ('proprio_achieved_goal', self.hand_space),
        ])
        self.init_puck_z = init_puck_z
        self.init_hand_xyz = np.array(init_hand_xyz)
        self._set_puck_xy(self.sample_puck_xy())
        self.reset_free = reset_free
        self.reset_counter = 0
        self.puck_space = Box(self.puck_low, self.puck_high, dtype=np.float32)
        self.clamp_puck_on_step = clamp_puck_on_step
        self.puck_radius = puck_radius
        self.reset()
 def model_name(self):
     return get_asset_full_path('sawyer_xyz/sawyer_reach.xml')
Exemple #19
0
    def __init__(
            self,
            puck_low=(-.35, .25),
            puck_high=(.35, .95),

            norm_order=1,
            indicator_threshold=0.06,
            touch_threshold=0.1,  # I just chose this number after doing a few runs and looking at a histogram

            hand_low=(-0.28, 0.3, 0.05),
            hand_high=(0.28, 0.9, 0.3),

            fix_goal=True,
            fixed_goal=(0.15, 0.6, 0.02, -0.15, 0.6),
            goal_low=(-0.25, 0.3, 0.02, -0.2, .4),
            goal_high=(0.25, 0.875, 0.02, .2, .8),

            init_puck_z=0.035,
            init_hand_xyz=(0, 0.4, 0.07),

            reset_free=False,
            xml_path='sawyer_xyz/sawyer_push_puck.xml',
            clamp_puck_on_step=False,

            puck_radius=.07,
            **kwargs
    ):
        self.quick_init(locals())
        self.model_name = get_asset_full_path(xml_path)

        self.goal_type = kwargs.pop('goal_type', 'puck')
        self.dense_reward = kwargs.pop('dense_reward', False)
        self.indicator_threshold = kwargs.pop('goal_tolerance', indicator_threshold)
        self.fixed_goal = np.array(kwargs.pop('goal', fixed_goal))
        self.task_agnostic = kwargs.pop('task_agnostic', False)
        self.init_puck_xy = np.array(kwargs.pop('init_puck_xy', [0, 0.6]))
        self.init_hand_xyz = np.array(kwargs.pop('init_hand_xyz', init_hand_xyz))

        MultitaskEnv.__init__(self)
        SawyerXYZEnv.__init__(
            self,
            hand_low=hand_low,
            hand_high=hand_high,
            model_name=self.model_name,
            **kwargs
        )
        if puck_low is None:
            puck_low = self.hand_low[:2]
        if puck_high is None:
            puck_high = self.hand_high[:2]

        self.puck_low = np.array(puck_low)
        self.puck_high = np.array(puck_high)

        if goal_low is None:
            goal_low = np.hstack((self.hand_low, puck_low))
        if goal_high is None:
            goal_high = np.hstack((self.hand_high, puck_high))
        self.goal_low = np.array(goal_low)
        self.goal_high = np.array(goal_high)

        self.norm_order = norm_order
        self.touch_threshold = touch_threshold
        self.fix_goal = fix_goal

        self._state_goal = None

        self.hide_goal_markers = self.task_agnostic

        self.action_space = Box(np.array([-1, -1, -1]), np.array([1, 1, 1]), dtype=np.float32)
        self.hand_and_puck_space = Box(
            np.hstack((self.hand_low, puck_low)),
            np.hstack((self.hand_high, puck_high)),
            dtype=np.float32
        )
        self.hand_space = Box(self.hand_low, self.hand_high, dtype=np.float32)
        self.observation_space = Dict([
            ('observation', self.hand_and_puck_space),
            ('desired_goal', self.hand_and_puck_space),
            ('achieved_goal', self.hand_and_puck_space),
            ('state_observation', self.hand_and_puck_space),
            ('state_desired_goal', self.hand_and_puck_space),
            ('state_achieved_goal', self.hand_and_puck_space),
            ('proprio_observation', self.hand_space),
            ('proprio_desired_goal', self.hand_space),
            ('proprio_achieved_goal', self.hand_space),
        ])

        self.init_puck_z = init_puck_z
        self._set_puck_xy(self.sample_puck_xy())
        self.reset_free = reset_free
        self.reset_counter = 0
        self.puck_space = Box(self.puck_low, self.puck_high, dtype=np.float32)
        self.clamp_puck_on_step = clamp_puck_on_step
        self.puck_radius = puck_radius
        self.reset()
 def model_name(self):
     return get_asset_full_path('sawyer_xyz/sawyer_dishRack.xml')
save_every = 10
render = False
positions = []
velocities = []
for _ in range(num_rollouts):
    env.reset()
    start_xy = sample_start_xy()
    # import ipdb; ipdb.set_trace()
    state = env.sim.get_state()
    state.qpos[:2] = start_xy
    env.sim.set_state(state)
    # env.sim.model.geom_pos[1:, 0:2] += start_xy
    for t in range(num_steps_per_rollout):
        env.step(env.action_space.sample())
        if t % save_every == 0:
            state = env.sim.get_state()
            positions.append(state.qpos.copy()[:15])
            velocities.append(state.qvel.copy()[:14])
        if render:
            env.render()

print(len(positions))
positions = np.array(positions)
positions_path = get_asset_full_path('classic_mujoco/ant_goal_qpos_5x5_xy.npy')
np.save(positions_path, positions)

velocities = np.array(velocities)
velocities_path = get_asset_full_path(
    'classic_mujoco/ant_goal_qvel_5x5_xy.npy')
np.save(velocities_path, positions)
 def model_name(self):
     return get_asset_full_path(
         'sawyer_xyz/sawyer_push_and_reach_mocap_goal_hidden_hurdle_horizontal.xml'
     )
Exemple #23
0
    def model_name(self):

        return get_asset_full_path('sawyer_xyz/sawyer_window_horizontal.xml')
Exemple #24
0
 def model_name(self):
     return get_asset_full_path('classic_mujoco/walker2d.xml')
Exemple #25
0
    def __init__(self,
                 goal_low=(0, ),
                 goal_high=(1.0472, ),
                 action_reward_scale=0,
                 reward_type='angle_difference',
                 indicator_threshold=(.02, .03),
                 fix_goal=False,
                 fixed_goal=(-.25),
                 reset_free=False,
                 fixed_hand_z=0.12,
                 hand_low=(-0.1, 0.45, 0.15),
                 hand_high=(0., 0.65, .225),
                 target_pos_scale=1,
                 target_angle_scale=1,
                 min_angle=0,
                 max_angle=1.0472,
                 xml_path='sawyer_xyz/sawyer_door_pull_hook.xml',
                 **sawyer_xyz_kwargs):
        self.quick_init(locals())
        self.model_name = get_asset_full_path(xml_path)
        SawyerXYZEnv.__init__(self,
                              self.model_name,
                              hand_low=hand_low,
                              hand_high=hand_high,
                              **sawyer_xyz_kwargs)
        MultitaskEnv.__init__(self)
        # self.initialize_camera(camera)
        self.reward_type = reward_type
        self.indicator_threshold = indicator_threshold

        self.fix_goal = fix_goal
        self.fixed_goal = np.array(fixed_goal)
        self.goal_space = Box(np.array(goal_low),
                              np.array(goal_high),
                              dtype=np.float32)
        self._state_goal = None
        self.fixed_hand_z = fixed_hand_z

        self.action_space = Box(np.array([-1, -1, -1]),
                                np.array([1, 1, 1]),
                                dtype=np.float32)
        self.state_space = Box(
            np.concatenate((hand_low, [min_angle])),
            np.concatenate((hand_high, [max_angle])),
            dtype=np.float32,
        )
        self.observation_space = Dict([
            ('observation', self.state_space),
            ('desired_goal', self.goal_space),
            ('achieved_goal', self.goal_space),
            ('state_observation', self.state_space),
            ('state_desired_goal', self.goal_space),
            ('state_achieved_goal', self.goal_space),
        ])
        self.action_reward_scale = action_reward_scale
        self.target_pos_scale = target_pos_scale
        self.target_angle_scale = target_angle_scale
        self.reset_free = reset_free
        self.door_angle_idx = self.model.get_joint_qpos_addr('doorjoint')
        #ensure env does not start in weird positions
        self.reset_free = True
        self.reset()
        self.reset_free = reset_free
Exemple #26
0
    def __init__(
            self,
            reward_type='dense',
            count_bonus_coeff=None,
            n_bins=16,
            target_radius=0.5,  # Required distance to goal when using sparse reward
            norm_order=2,
            frame_skip=5,
            two_frames=False,
            vel_in_state=True,
            ant_low=list([-6, -6]),
            ant_high=list([6, 6]),
            goal_low=list([-6, -6]),
            goal_high=list([6, 6]),
            model_path='classic_mujoco/normal_gear_ratio_ant.xml',
            use_low_gear_ratio=False,
            goal_is_xy=False,
            goal_is_qpos=False,
            init_qpos=None,
            fixed_goal=None,
            diagnostics_goal=None,
            init_xy_mode='corner',
            terminate_when_unhealthy=False,
            healthy_z_range=(0.2, 0.9),
            # health_reward=10,
            done_penalty=0,
            goal_sampling_strategy='uniform',
            presampled_goal_paths='',
            *args,
            **kwargs):
        assert init_xy_mode in {
            'corner',
            'sample-uniformly-xy-space',
            'sample-from-goal-space',  # soon to be deprecated
        }
        assert not goal_is_xy or not goal_is_qpos
        self.quick_init(locals())
        MultitaskEnv.__init__(self)
        if use_low_gear_ratio:
            model_path = "classic_mujoco/ant_maze_gear30_small_dt3_with_invis.xml"
        MujocoEnv.__init__(self,
                           model_path=get_asset_full_path(model_path),
                           frame_skip=frame_skip,
                           **kwargs)
        if goal_is_xy:
            assert reward_type.startswith('xy')

        if init_qpos is not None:
            self.init_qpos[:len(init_qpos)] = np.array(init_qpos)

        self.action_space = Box(-np.ones(8), np.ones(8), dtype=np.float32)
        self.reward_type = reward_type
        self.count_bonus_coeff = count_bonus_coeff
        self.n_bins = n_bins
        self.bin_counts = np.ones((self.n_bins, self.n_bins))
        self.target_radius = target_radius
        self.norm_order = norm_order
        self.goal_is_xy = goal_is_xy
        self.goal_is_qpos = goal_is_qpos
        self.fixed_goal = fixed_goal
        self.diagnostics_goal = diagnostics_goal
        print(f"[Ant] Diagnostics goal: {self.diagnostics_goal}")
        self.init_xy_mode = init_xy_mode
        self.terminate_when_unhealthy = terminate_when_unhealthy
        # self._healthy_reward = health_reward
        self._done_penalty = done_penalty
        self._healthy_z_range = healthy_z_range

        self.model_path = model_path
        assert goal_sampling_strategy in {'uniform', 'preset1', 'presampled'}
        self.goal_sampling_strategy = goal_sampling_strategy
        if self.goal_sampling_strategy == 'presampled':
            assert presampled_goal_paths is not None
            if not osp.exists(presampled_goal_paths):
                presampled_goal_paths = get_asset_full_path(
                    presampled_goal_paths)
            self.presampled_goals = np.load(presampled_goal_paths)
        else:
            self.presampled_goals = None

        self.ant_low, self.ant_high = np.array(ant_low), np.array(ant_high)
        goal_low, goal_high = np.array(goal_low), np.array(goal_high)
        self.two_frames = two_frames
        self.vel_in_state = vel_in_state
        if self.vel_in_state:
            obs_space_low = np.concatenate((self.ant_low, -np.ones(27)))
            obs_space_high = np.concatenate((self.ant_high, np.ones(27)))
            if goal_is_xy:
                goal_space_low = goal_low
                goal_space_high = goal_high
            else:
                goal_space_low = np.concatenate((goal_low, -np.ones(27)))
                goal_space_high = np.concatenate((goal_high, np.ones(27)))
        else:
            obs_space_low = np.concatenate((self.ant_low, -np.ones(13)))
            obs_space_high = np.concatenate((self.ant_high, np.ones(13)))
            if goal_is_xy:
                goal_space_low = goal_low
                goal_space_high = goal_high
            else:
                goal_space_low = np.concatenate((goal_low, -np.ones(13)))
                goal_space_high = np.concatenate((goal_high, np.ones(13)))

        if self.two_frames:
            self.obs_space = Box(np.concatenate(
                (obs_space_low, obs_space_low)),
                                 np.concatenate(
                                     (obs_space_high, obs_space_high)),
                                 dtype=np.float32)
            self.goal_space = Box(np.concatenate(
                (goal_space_low, goal_space_low)),
                                  np.concatenate(
                                      (goal_space_high, goal_space_high)),
                                  dtype=np.float32)
        else:
            self.obs_space = Box(obs_space_low,
                                 obs_space_high,
                                 dtype=np.float32)
            self.goal_space = Box(goal_space_low,
                                  goal_space_high,
                                  dtype=np.float32)
        qpos_space = Box(-10 * np.ones(15), 10 * np.ones(15))

        spaces = [
            ('observation', self.obs_space),
            ('desired_goal', self.goal_space),
            ('achieved_goal', self.goal_space),
            ('state_observation', self.obs_space),
            ('state_desired_goal', self.goal_space),
            ('state_achieved_goal', self.goal_space),
        ]
        if self.goal_is_xy:
            self.xy_obs_space = Box(goal_low, goal_high, dtype=np.float32)
            spaces += [
                ('xy_observation', self.xy_obs_space),
                ('xy_desired_goal', self.xy_obs_space),
                ('xy_achieved_goal', self.xy_obs_space),
            ]
        if self.goal_is_qpos:
            spaces += [
                ('qpos_desired_goal', qpos_space),
                ('qpos_achieved_goal', qpos_space),
            ]

        self.observation_space = Dict(spaces)

        self._full_state_goal = None
        self._xy_goal = None
        self._qpos_goal = None
        self._prev_obs = None
        self._cur_obs = None
 def model_name(self):
     return get_asset_full_path('sawyer_xyz/sawyer_pick_and_place.xml')
Exemple #28
0
    def model_name(self):
        #Remember to set the right limits in the base file (right line needs to be commented out)

        self.reset_mocap_quat = [1, 0, 1, 0]
        return get_asset_full_path('sawyer_xyz/multi_domain.xml')
Exemple #29
0
 def model_name(self):
     return get_asset_full_path('classic_mujoco/half_cheetah.xml')
    def model_name(self):

        return get_asset_full_path(
            'sawyer_xyz/sawyer_reach_push_pick_and_place_wall.xml')