コード例 #1
0
    def __init__(self,
                 reward_type='dense',
                 norm_order=2,
                 action_scale=0.15,
                 frame_skip=100,
                 ball_low=(-4.0, -4.0),
                 ball_high=(4.0, 4.0),
                 goal_low=(-4.0, -4.0),
                 goal_high=(4.0, 4.0),
                 model_path='pointmass_u_wall_big.xml',
                 reset_low=None,
                 reset_high=None,
                 v_func_heatmap_bounds=(-2.0, 0.0),
                 *args,
                 **kwargs):
        self.quick_init(locals())
        MultitaskEnv.__init__(self)
        MujocoEnv.__init__(self,
                           model_path=get_asset_full_path('pointmass/' +
                                                          model_path),
                           frame_skip=frame_skip,
                           **kwargs)

        self.action_space = Box(np.array([-1, -1]),
                                np.array([1, 1]),
                                dtype=np.float32)
        self.action_scale = action_scale
        self.ball_radius = 0.50
        self.walls = [
            Wall(0, 1.0, 2.5, 0.5, self.ball_radius),
            Wall(2.0, -0.5, 0.5, 2.0, self.ball_radius),
            Wall(-2.0, -0.5, 0.5, 2.0, self.ball_radius),
        ]

        self.reward_type = reward_type
        self.norm_order = norm_order

        self.ball_low, self.ball_high = np.array(ball_low), np.array(ball_high)
        self.goal_low, self.goal_high = np.array(goal_low), np.array(goal_high)

        if reset_low is None:
            self.reset_low = np.array(ball_low)
        else:
            self.reset_low = np.array(reset_low)
        if reset_high is None:
            self.reset_high = np.array(ball_high)
        else:
            self.reset_high = np.array(reset_high)

        obs_space_low = np.copy(self.ball_low)
        obs_space_high = np.copy(self.ball_high)
        goal_space_low = np.copy(self.goal_low)
        goal_space_high = np.copy(self.goal_high)

        self.obs_space = Box(obs_space_low, obs_space_high, dtype=np.float32)
        self.goal_space = Box(goal_space_low,
                              goal_space_high,
                              dtype=np.float32)

        self.observation_space = Dict([
            ('observation', self.obs_space),
            ('desired_goal', self.goal_space),
            ('achieved_goal', self.obs_space),
            ('state_observation', self.obs_space),
            ('state_desired_goal', self.goal_space),
            ('state_achieved_goal', self.obs_space),
            ('proprio_observation', self.obs_space),
            ('proprio_desired_goal', self.goal_space),
            ('proprio_achieved_goal', self.obs_space),
        ])

        self.v_func_heatmap_bounds = v_func_heatmap_bounds

        self._state_goal = None
        self.reset()
コード例 #2
0
    def __init__(self,
                 reward_info=None,
                 frame_skip=50,
                 pos_action_scale=2. / 100,
                 randomize_goals=True,
                 hide_goal=False,
                 init_block_low=(-0.02, 0.54),
                 init_block_high=(-0.02, 0.54),
                 puck_goal_low=(-0.05, 0.57),
                 puck_goal_high=(0.05, 0.65),
                 hand_goal_low=(-0.05, 0.55),
                 hand_goal_high=(0.05, 0.65),
                 fixed_puck_goal=(0.05, 0.6),
                 fixed_hand_goal=(-0.05, 0.6),
                 mocap_low=(-0.1, 0.5, 0.0),
                 mocap_high=(0.1, 0.7, 0.5),
                 force_puck_in_goal_space=False,
                 puckInvisible=False):
        self.quick_init(locals())
        self.reward_info = reward_info
        self.randomize_goals = randomize_goals
        self._pos_action_scale = pos_action_scale
        self.hide_goal = hide_goal
        self.puckInvisible = puckInvisible

        self.init_block_low = np.array(init_block_low)
        self.init_block_high = np.array(init_block_high)
        self.puck_goal_low = np.array(puck_goal_low)
        self.puck_goal_high = np.array(puck_goal_high)
        self.hand_goal_low = np.array(hand_goal_low)
        self.hand_goal_high = np.array(hand_goal_high)
        self.fixed_puck_goal = np.array(fixed_puck_goal)
        self.fixed_hand_goal = np.array(fixed_hand_goal)
        self.mocap_low = np.array(mocap_low)
        self.mocap_high = np.array(mocap_high)
        self.force_puck_in_goal_space = force_puck_in_goal_space

        self._goal_xyxy = self.sample_goal_xyxy()
        # MultitaskEnv.__init__(self, distance_metric_order=2)
        MujocoEnv.__init__(self, self.model_name, frame_skip=frame_skip)

        self.action_space = Box(
            np.array([-1, -1]),
            np.array([1, 1]),
        )
        self.obs_box = Box(
            np.array([-0.2, 0.5, -0.2, 0.5]),
            np.array([0.2, 0.7, 0.2, 0.7]),
        )
        goal_low = np.concatenate((self.hand_goal_low, self.puck_goal_low))
        goal_high = np.concatenate((self.hand_goal_high, self.puck_goal_high))
        self.goal_box = Box(
            goal_low,
            goal_high,
        )
        self.observation_space = Dict([
            ('observation', self.obs_box),
            ('state_observation', self.obs_box),
            ('desired_goal', self.goal_box),
            ('state_desired_goal', self.goal_box),
            ('achieved_goal', self.goal_box),
            ('state_achieved_goal', self.goal_box),
        ])
        # hack for state-based experiments for other envs
        # self.observation_space = Box(
        #     np.array([-0.2, 0.5, -0.2, 0.5, -0.2, 0.5]),
        #     np.array([0.2, 0.7, 0.2, 0.7, 0.2, 0.7]),
        # )
        # self.goal_space = Box(
        #     np.array([-0.2, 0.5, -0.2, 0.5, -0.2, 0.5]),
        #     np.array([0.2, 0.7, 0.2, 0.7, 0.2, 0.7]),
        # )

        self.reset()
        self.reset_mocap_welds()
コード例 #3
0
    def __init__(
            self,
            obj_low=None,
            obj_high=None,

            hand_low=(-0.1, 0.55, 0.05),
            hand_high=(0.05, 0.65, 0.2),

            reward_type='hand_and_obj_distance',
            indicator_threshold=0.06,
            frame_skip = 50,
            obj_init_positions=((0, 0.6, 0.02),),
            random_init=False,

            num_objects=1,
            fix_goal=False,

            mocap_low=(-0.13, 0.57, 0.04),
            mocap_high=(0.08, 0.73, 0.2),
            action_scale=0.02,

            fixed_goal=(0.15, 0.6, 0.055, -0.15, 0.6),
            goal_low=None,
            goal_high=None,
            reset_free=False,
            filename='sawyer_pick_and_place_multiobj.xml',
            object_mass=1,
            # object_meshes=['Bowl', 'GlassBowl', 'LotusBowl01', 'ElephantBowl', 'RuggedBowl'],
            object_meshes=None,
            obj_classname = None,
            block_height=0.04,
            block_width = 0.04,
            cylinder_radius = 0.015,
            finger_sensors=False,
            maxlen=0.06,
            minlen=0.01,
            preload_obj_dict=None,
            hide_goal_markers=False,
            oracle_reset_prob=0.0,
            presampled_goals=None,
            num_goals_presampled=2,
            p_obj_in_hand=1,

            **kwargs
    ):
        self.quick_init(locals())
        MultitaskEnv.__init__(self)


        base_filename = asset_base_path + filename
        friction_params = (1, 1, 2)
        self.obj_stat_prop = create_object_xml(base_filename, num_objects, object_mass,
                                              friction_params, object_meshes, finger_sensors,
                                               maxlen, minlen, preload_obj_dict, obj_classname,
                                               block_height, block_width, cylinder_radius)

        gen_xml = create_root_xml(base_filename)
        MujocoEnv.__init__(self, gen_xml, frame_skip=frame_skip)
        self.reset_mocap_welds()

        clean_xml(gen_xml)

        self.mocap_low = mocap_low
        self.mocap_high = mocap_high
        self.action_scale = action_scale


        self.hand_low = np.array(hand_low)
        self.hand_high = np.array(hand_high)

        if obj_low is None:
            obj_low = self.hand_low
        if obj_high is None:
            obj_high = self.hand_high
        self.obj_low = obj_low
        self.obj_high = obj_high
        if goal_low is None:
            goal_low = np.hstack((self.hand_low, obj_low))
        if goal_high is None:
            goal_high = np.hstack((self.hand_high, obj_high))

        self.reward_type = reward_type
        self.random_init = random_init
        self.p_obj_in_hand = p_obj_in_hand
        self.indicator_threshold = indicator_threshold

        self.obj_init_z = obj_init_positions[0][2]
        self.obj_init_positions = np.array(obj_init_positions)
        self.last_obj_pos = self.obj_init_positions[0]

        self.fix_goal = fix_goal
        self.fixed_goal = np.array(fixed_goal)
        self._state_goal = None
        self.reset_free = reset_free
        self.oracle_reset_prob = oracle_reset_prob

        self.obj_ind_to_manip = 0

        self.hide_goal_markers = hide_goal_markers

        self.action_space = Box(
            np.array([-1, -1, -1, -1]),
            np.array([1, 1, 1, 1]),
            dtype=np.float32
        )
        self.hand_and_obj_space = Box(
            np.hstack((self.hand_low, obj_low)),
            np.hstack((self.hand_high, obj_high)),
            dtype=np.float32
        )
        self.hand_space = Box(
            self.hand_low,
            self.hand_high,
            dtype=np.float32
        )
        self.gripper_and_hand_and_obj_space = Box(
            np.hstack(([0.0], self.hand_low, obj_low)),
            np.hstack(([0.04], self.hand_high, obj_high)),
            dtype=np.float32
        )
        self.num_objects = num_objects
        self.maxlen = maxlen

        self.observation_space = Dict([
            ('observation', self.gripper_and_hand_and_obj_space),
            ('desired_goal', self.hand_and_obj_space),
            ('achieved_goal', self.hand_and_obj_space),
            ('state_observation', self.gripper_and_hand_and_obj_space),
            ('state_desired_goal', self.hand_and_obj_space),
            ('state_achieved_goal', self.hand_and_obj_space),
            ('proprio_observation', self.hand_space),
            ('proprio_desired_goal', self.hand_space),
            ('proprio_achieved_goal', self.hand_space),
        ])
        self.hand_reset_pos = np.array([0, .6, .2])

        if presampled_goals is not None:
            self._presampled_goals = presampled_goals
            self.num_goals_presampled = len(list(self._presampled_goals.values)[0])
        else:
            # presampled_goals will be created when sample_goal is first called
            self._presampled_goals = None
            self.num_goals_presampled = num_goals_presampled
        self.num_goals_presampled = 10
        self.picked_up_object = False
        self.train_pickups = 0
        self.eval_pickups = 0
        self.cur_mode = 'train'
        self.reset()
コード例 #4
0
ファイル: base.py プロジェクト: scottemmons/gcsl
 def __init__(self, model_name, frame_skip=50):
     MujocoEnv.__init__(self, model_name, frame_skip=frame_skip)
     self.reset_mocap_welds()
コード例 #5
0
    def __init__(
        self,
        reward_info=None,
        frame_skip=50,
        pos_action_scale=4. / 100,
        randomize_goals=True,
        puck_goal_low=(-0.1, 0.5),
        puck_goal_high=(0.1, 0.7),
        hand_goal_low=(-0.1, 0.5),
        hand_goal_high=(0.1, 0.7),
        mocap_low=(-0.1, 0.5, 0.0),
        mocap_high=(0.1, 0.7, 0.5),
        # unused
        init_block_low=(-0.05, 0.55),
        init_block_high=(0.05, 0.65),
        fixed_puck_goal=(0.05, 0.6),
        fixed_hand_goal=(-0.05, 0.6),
        # multi-object
        num_objects=1,
        fixed_colors=True,
        seed=None,
        filename='sawyer_multiobj.xml',
        object_mass=1,
        # object_meshes=['Bowl', 'GlassBowl', 'LotusBowl01', 'ElephantBowl', 'RuggedBowl'],
        object_meshes=None,
        obj_classname=None,
        block_height=0.02,
        block_width=0.02,
        cylinder_radius=0.05,
        finger_sensors=False,
        maxlen=0.06,
        minlen=0.01,
        preload_obj_dict=None,
        reset_to_initial_position=True,
        object_low=(-np.inf, -np.inf, -np.inf),
        object_high=(np.inf, np.inf, np.inf),
        action_repeat=1,
        fixed_start=True,
        fixed_start_pos=(0, 0.6),
        goal_moves_one_object=False,
        num_scene_objects=None,  # list of number of objects that can appear per scene
        object_height=0.02,
        use_textures=False,
        init_camera=None,
        sliding_joints=False,
    ):
        if seed:
            np.random.seed(seed)
            self.env_seed = seed
        self.quick_init(locals())
        self.reward_info = reward_info
        self.randomize_goals = randomize_goals
        self._pos_action_scale = pos_action_scale
        self.reset_to_initial_position = reset_to_initial_position

        self.init_block_low = np.array(init_block_low)
        self.init_block_high = np.array(init_block_high)
        self.puck_goal_low = np.array(puck_goal_low)
        self.puck_goal_high = np.array(puck_goal_high)
        self.hand_goal_low = np.array(hand_goal_low)
        self.hand_goal_high = np.array(hand_goal_high)
        self.fixed_puck_goal = np.array(fixed_puck_goal)
        self.fixed_hand_goal = np.array(fixed_hand_goal)
        self.mocap_low = np.array(mocap_low)
        self.mocap_high = np.array(mocap_high)
        self.object_low = np.array(object_low)
        self.object_high = np.array(object_high)
        self.action_repeat = action_repeat
        self.fixed_colors = fixed_colors
        self.goal_moves_one_object = goal_moves_one_object

        self.num_objects = num_objects
        self.num_scene_objects = num_scene_objects
        self.object_height = object_height
        self.fixed_start = fixed_start
        self.fixed_start_pos = np.array(fixed_start_pos)
        self.maxlen = maxlen
        self.use_textures = use_textures
        self.sliding_joints = sliding_joints
        self.cur_objects = [0] * num_objects
        self.preload_obj_dict = preload_obj_dict

        self.num_cur_objects = 0
        # Generate XML
        base_filename = asset_base_path + filename
        friction_params = (0.1, 0.1, 0.02)
        self.obj_stat_prop = create_object_xml(
            base_filename, num_objects, object_mass, friction_params,
            object_meshes, finger_sensors, maxlen, minlen, preload_obj_dict,
            obj_classname, block_height, block_width, cylinder_radius,
            use_textures, sliding_joints)
        gen_xml = create_root_xml(base_filename)
        MujocoEnv.__init__(self, gen_xml, frame_skip=frame_skip)
        clean_xml(gen_xml)

        if self.use_textures:
            self.modder = TextureModder(self.sim)

        self.state_goal = self.sample_goal_for_rollout()
        # MultitaskEnv.__init__(self, distance_metric_order=2)
        # MujocoEnv.__init__(self, gen_xml, frame_skip=frame_skip)

        self.action_space = Box(
            np.array([-1, -1]),
            np.array([1, 1]),
        )

        self.num_objects = num_objects
        low = (self.num_scene_objects[0] + 1) * [-0.2, 0.5]
        high = (self.num_scene_objects[0] + 1) * [0.2, 0.7]
        self.obs_box = Box(
            np.array(low),
            np.array(high),
        )
        goal_low = np.array(
            low)  # np.concatenate((self.hand_goal_low, self.puck_goal_low))
        goal_high = np.array(
            high)  # np.concatenate((self.hand_goal_high, self.puck_goal_high))
        self.goal_box = Box(
            goal_low,
            goal_high,
        )
        self.total_objects = self.num_objects + 1
        self.objects_box = Box(
            np.zeros((self.total_objects, )),
            np.ones((self.total_objects, )),
        )
        self.observation_space = Dict([
            ('observation', self.obs_box),
            ('state_observation', self.obs_box),
            ('desired_goal', self.goal_box),
            ('state_desired_goal', self.goal_box),
            ('achieved_goal', self.goal_box),
            ('state_achieved_goal', self.goal_box),
            ('objects', self.objects_box),
        ])
        # hack for state-based experiments for other envs
        # self.observation_space = Box(
        #     np.array([-0.2, 0.5, -0.2, 0.5, -0.2, 0.5]),
        #     np.array([0.2, 0.7, 0.2, 0.7, 0.2, 0.7]),
        # )
        # self.goal_space = Box(
        #     np.array([-0.2, 0.5, -0.2, 0.5, -0.2, 0.5]),
        #     np.array([0.2, 0.7, 0.2, 0.7, 0.2, 0.7]),
        # )

        self.set_initial_object_positions()

        if use_textures:
            super().initialize_camera(init_camera)
            self.initialized_camera = init_camera

        self.reset()
        self.reset_mocap_welds()
コード例 #6
0
ファイル: sawyer_push_nips.py プロジェクト: wuyx/LeapPaper
    def __init__(
        self,
        frame_skip=50,
        action_scale=2. / 100,
        hand_low=(-0.2, 0.5),
        hand_high=(0.2, 0.7),
        puck_low=(-0.2, 0.5),
        puck_high=(0.2, 0.7),
        fix_goal=False,
        sample_realistic_goals=False,
        fixed_goal=(-0.05, 0.6, 0.05, 0.6),
        goal_low=None,
        goal_high=None,
        fix_reset=False,
        fixed_reset=(0, 0.55, 0.0, 0.65),
        reset_low=None,
        reset_high=None,
        hand_z_position=0.06,
        puck_z_position=0.02,
        reward_type='state_distance',
        norm_order=2,
        indicator_threshold=0.06,
        num_mocap_calls_for_reset=250,
        square_puck=False,
        heavy_puck=False,
        invisible_boundary_wall=False,
        indicator_threshold_2=0.08,
        indicator_threshold_3=0.12,
        test_mode_case_num=None,
    ):
        self.quick_init(locals())

        self.square_puck = square_puck
        self.heavy_puck = heavy_puck
        self.invisible_boundary_wall = invisible_boundary_wall

        if self.invisible_boundary_wall:
            model_name = get_asset_full_path(
                'sawyer_xyz/sawyer_push_and_reach_nips_wall.xml')
        elif self.square_puck and self.heavy_puck:
            model_name = get_asset_full_path(
                'sawyer_xyz/sawyer_push_and_reach_nips_square_heavy.xml')
        elif self.square_puck and not self.heavy_puck:
            model_name = get_asset_full_path(
                'sawyer_xyz/sawyer_push_and_reach_nips_square.xml')
        elif not self.square_puck and self.heavy_puck:
            model_name = get_asset_full_path(
                'sawyer_xyz/sawyer_push_and_reach_nips_heavy.xml')
        else:
            model_name = get_asset_full_path(
                'sawyer_xyz/sawyer_push_and_reach_nips.xml')

        MujocoEnv.__init__(self, model_name, frame_skip=frame_skip)

        hand_low = np.array(hand_low)
        hand_high = np.array(hand_high)
        mocap_low = hand_low
        mocap_high = hand_high
        self.mocap_low = np.hstack((mocap_low, np.array([0.0])))
        self.mocap_high = np.hstack((mocap_high, np.array([0.5])))
        puck_low = np.array(puck_low)
        puck_high = np.array(puck_high)

        if self.square_puck:
            self.puck_radius = np.sqrt(2) * 0.04
        else:
            self.puck_radius = 0.04

        self.ee_radius = 0.015

        # puck_low += (self.puck_radius + self.ee_radius)
        # puck_high -= (self.puck_radius + self.ee_radius)
        # print(hand_low)
        # print(hand_high)
        # print(puck_low)
        # print(puck_high)

        self.obs_space = Box(np.hstack((hand_low, puck_low)),
                             np.hstack((hand_high, puck_high)),
                             dtype=np.float32)
        self.hand_space = Box(hand_low, hand_high, dtype=np.float32)
        self.puck_space = Box(puck_low, puck_high, dtype=np.float32)
        if goal_low is None:
            goal_low = self.obs_space.low.copy()
        if goal_high is None:
            goal_high = self.obs_space.high.copy()
        if reset_low is None:
            reset_low = self.obs_space.low.copy()
        if reset_high is None:
            reset_high = self.obs_space.high.copy()
        goal_low = np.array(goal_low)
        goal_high = np.array(goal_high)
        reset_low = np.array(reset_low)
        reset_high = np.array(reset_high)
        self.goal_space = Box(goal_low, goal_high, dtype=np.float32)
        self.reset_space = Box(reset_low, reset_high, dtype=np.float32)
        self.observation_space = Dict([
            ('observation', self.obs_space),
            ('desired_goal', self.goal_space),
            ('achieved_goal', self.obs_space),
            ('state_observation', self.obs_space),
            ('state_desired_goal', self.goal_space),
            ('state_achieved_goal', self.obs_space),
            ('proprio_observation', self.hand_space),
            ('proprio_desired_goal',
             Box(goal_low[:2], goal_high[:2], dtype=np.float32)),
            ('proprio_achieved_goal', self.hand_space),
        ])

        self.num_mocap_calls_for_reset = num_mocap_calls_for_reset

        self.fix_reset = fix_reset
        self.sample_realistic_goals = sample_realistic_goals
        self.fixed_reset = np.array(fixed_reset)
        self.fix_goal = fix_goal
        self.fixed_goal = np.array(fixed_goal)
        self._state_goal = None

        self.test_mode_case_num = test_mode_case_num
        assert self.test_mode_case_num in [None, 1, 2, 3, 4, 5]
        if self.test_mode_case_num is not None:
            self.fix_reset = True
            self.fix_goal = True
        if self.test_mode_case_num == 1:
            self.fixed_reset = np.array([0.20, 0.70, -0.15, 0.55])
            self.fixed_goal = np.array([-0.20, 0.50, 0.20, 0.70])
        elif self.test_mode_case_num == 2:
            self.fixed_reset = np.array([0.20, 0.70, -0.15, 0.55])
            self.fixed_goal = np.array([0.15, 0.65, 0.20, 0.70])
        elif self.test_mode_case_num == 3:
            self.fixed_reset = np.array([0.20, 0.70, -0.15, 0.55])
            self.fixed_goal = np.array([0.20, 0.70, -0.20, 0.70])
        elif self.test_mode_case_num == 4:
            self.fixed_reset = np.array([0.20, 0.70, -0.15, 0.55])
            self.fixed_goal = np.array([0.20, 0.50, -0.20, 0.70])
        elif self.test_mode_case_num == 5:
            self.fixed_reset = np.array([0.20, 0.70, -0.15, 0.55])
            self.fixed_goal = np.array([0.20, 0.50, 0.20, 0.70])

        self.reward_type = reward_type
        self.norm_order = norm_order
        self.indicator_threshold = indicator_threshold
        self.indicator_threshold_2 = indicator_threshold_2
        self.indicator_threshold_3 = indicator_threshold_3

        self.action_space = Box(np.array([-1, -1]),
                                np.array([1, 1]),
                                dtype=np.float32)
        self._action_scale = action_scale
        self.hand_z_position = hand_z_position
        self.puck_z_position = puck_z_position
        self.reset_counter = 0
        self.reset()
コード例 #7
0
    def __init__(
            self,
            reward_info=None,
            frame_skip=50,
            pos_action_scale=2. / 100,
            randomize_goals=True,
            puck_goal_low=(-0.1, 0.5),
            puck_goal_high=(0.1, 0.7),
            hand_goal_low=(-0.1, 0.5),
            hand_goal_high=(0.1, 0.7),
            mocap_low=(-0.1, 0.5, 0.0),
            mocap_high=(0.1, 0.7, 0.5),
            # unused
            init_block_low=(-0.05, 0.55),
            init_block_high=(0.05, 0.65),
            fixed_puck_goal=(0.05, 0.6),
            fixed_hand_goal=(-0.05, 0.6),
            # multi-object
            num_objects=1,
            filename='sawyer_multiobj.xml',
            object_mass=1,
            # object_meshes=['Bowl', 'GlassBowl', 'LotusBowl01', 'ElephantBowl', 'RuggedBowl'],
            object_meshes=None,
            obj_classname=None,
            block_height=0.02,
            block_width=0.02,
            cylinder_radius=0.05,
            finger_sensors=False,
            maxlen=0.06,
            minlen=0.01,
            preload_obj_dict=None,
            reset_to_initial_position=True,
            object_low=(-0.1, 0.5, 0.0),
            object_high=(0.1, 0.7, 0.5),
            action_repeat=1,
            fixed_start=True,
    ):
        self.quick_init(locals())
        self.reward_info = reward_info
        self.randomize_goals = randomize_goals
        self._pos_action_scale = pos_action_scale
        self.reset_to_initial_position = reset_to_initial_position

        self.init_block_low = np.array(init_block_low)
        self.init_block_high = np.array(init_block_high)
        self.puck_goal_low = np.array(puck_goal_low)
        self.puck_goal_high = np.array(puck_goal_high)
        self.hand_goal_low = np.array(hand_goal_low)
        self.hand_goal_high = np.array(hand_goal_high)
        self.fixed_puck_goal = np.array(fixed_puck_goal)
        self.fixed_hand_goal = np.array(fixed_hand_goal)
        self.mocap_low = np.array(mocap_low)
        self.mocap_high = np.array(mocap_high)
        self.object_low = np.array(object_low)
        self.object_high = np.array(object_high)
        self.action_repeat = action_repeat

        self.num_objects = num_objects
        self.fixed_start = fixed_start
        self.maxlen = maxlen

        # Generate XML
        base_filename = asset_base_path + filename
        friction_params = (0.1, 0.1, 0.02)
        self.obj_stat_prop = create_object_xml(
            base_filename, num_objects, object_mass, friction_params,
            object_meshes, finger_sensors, maxlen, minlen, preload_obj_dict,
            obj_classname, block_height, block_width, cylinder_radius)
        gen_xml = create_root_xml(base_filename)
        MujocoEnv.__init__(self, gen_xml, frame_skip=frame_skip)
        clean_xml(gen_xml)

        self.state_goal = self.sample_goal_xyxy()
        # MultitaskEnv.__init__(self, distance_metric_order=2)
        # MujocoEnv.__init__(self, gen_xml, frame_skip=frame_skip)

        self.action_space = Box(
            np.array([-1, -1]),
            np.array([1, 1]),
        )

        self.num_objects = num_objects
        low = (self.num_objects + 1) * [-0.2, 0.5]
        high = (self.num_objects + 1) * [0.2, 0.7]
        self.obs_box = Box(
            np.array(low),
            np.array(high),
        )
        goal_low = np.array(
            low)  # np.concatenate((self.hand_goal_low, self.puck_goal_low))
        goal_high = np.array(
            high)  # np.concatenate((self.hand_goal_high, self.puck_goal_high))
        self.goal_box = Box(
            goal_low,
            goal_high,
        )
        self.observation_space = Dict([
            ('observation', self.obs_box),
            ('state_observation', self.obs_box),
            ('desired_goal', self.goal_box),
            ('state_desired_goal', self.goal_box),
            ('achieved_goal', self.goal_box),
            ('state_achieved_goal', self.goal_box),
        ])
        # hack for state-based experiments for other envs
        # self.observation_space = Box(
        #     np.array([-0.2, 0.5, -0.2, 0.5, -0.2, 0.5]),
        #     np.array([0.2, 0.7, 0.2, 0.7, 0.2, 0.7]),
        # )
        # self.goal_space = Box(
        #     np.array([-0.2, 0.5, -0.2, 0.5, -0.2, 0.5]),
        #     np.array([0.2, 0.7, 0.2, 0.7, 0.2, 0.7]),
        # )

        self.set_initial_object_positions()

        self.reset()
        self.reset_mocap_welds()
コード例 #8
0
ファイル: wheeled_car.py プロジェクト: wuyx/LeapPaper
    def __init__(
            self,
            reward_type='state_distance',
            norm_order=2,
            action_scale=20,
            frame_skip=3,
            two_frames=False,
            vel_in_state=True,
            z_in_state=True,
            car_low=(-1.90, -1.90),  #list([-1.60, -1.60]),
            car_high=(1.90, 1.90),  #list([1.60, 1.60]),
            goal_low=(-1.90, -1.90),  #list([-1.60, -1.60]),
            goal_high=(1.90, 1.90),  #list([1.60, 1.60]),
            model_path='wheeled_car.xml',
            reset_low=None,
            reset_high=None,
            *args,
            **kwargs):
        self.quick_init(locals())
        MultitaskEnv.__init__(self)
        MujocoEnv.__init__(self,
                           model_path=get_asset_full_path('locomotion/' +
                                                          model_path),
                           frame_skip=frame_skip,
                           **kwargs)

        self.action_space = Box(np.array([-1, -1]),
                                np.array([1, 1]),
                                dtype=np.float32)
        self.action_scale = action_scale

        # set radius
        if model_path in ['wheeled_car.xml', 'wheeled_car_u_wall.xml']:
            self.car_radius = np.sqrt(2) * 0.34
        elif model_path == 'wheeled_car_old.xml':
            self.car_radius = 0.30
        else:
            raise NotImplementedError

        # set walls
        if model_path == 'wheeled_car_u_wall.xml':
            self.walls = [
                Wall(0, -0.5, 0.75, 0.05, self.car_radius),
                Wall(0.70, 0.05, 0.05, 0.5, self.car_radius),
                Wall(-0.70, 0.05, 0.05, 0.5, self.car_radius),
            ]
        else:
            self.walls = []

        self.reward_type = reward_type
        self.norm_order = norm_order

        self.car_low, self.car_high = np.array(car_low), np.array(car_high)
        self.goal_low, self.goal_high = np.array(goal_low), np.array(goal_high)

        if reset_low is None:
            self.reset_low = np.array(car_low)
        else:
            self.reset_low = np.array(reset_low)
        if reset_high is None:
            self.reset_high = np.array(car_high)
        else:
            self.reset_high = np.array(reset_high)

        self.car_low += self.car_radius
        self.car_high -= self.car_radius
        self.goal_low += self.car_radius
        self.goal_high -= self.car_radius
        self.reset_low += self.car_radius
        self.reset_high -= self.car_radius

        self.two_frames = two_frames
        self.vel_in_state = vel_in_state
        self.z_in_state = z_in_state

        # x and y pos
        obs_space_low = np.copy(self.car_low)
        obs_space_high = np.copy(self.car_high)
        goal_space_low = np.copy(self.goal_low)
        goal_space_high = np.copy(self.goal_high)

        # z pos
        if self.z_in_state:
            obs_space_low = np.concatenate((obs_space_low, [-1]))
            obs_space_high = np.concatenate((obs_space_high, [0.03]))
            goal_space_low = np.concatenate((goal_space_low, [0]))
            goal_space_high = np.concatenate((goal_space_high, [0]))

        # sin and cos
        obs_space_low = np.concatenate((obs_space_low, [-1, -1]))
        obs_space_high = np.concatenate((obs_space_high, [1, 1]))
        goal_space_low = np.concatenate((goal_space_low, [-1, -1]))
        goal_space_high = np.concatenate((goal_space_high, [1, 1]))

        if self.vel_in_state:
            # x and y vel
            obs_space_low = np.concatenate((obs_space_low, [-10, -10]))
            obs_space_high = np.concatenate((obs_space_high, [10, 10]))
            goal_space_low = np.concatenate((goal_space_low, [0, 0]))
            goal_space_high = np.concatenate((goal_space_high, [0, 0]))

            # z vel
            if self.z_in_state:
                obs_space_low = np.concatenate((obs_space_low, [-10]))
                obs_space_high = np.concatenate((obs_space_high, [10]))
                goal_space_low = np.concatenate((goal_space_low, [0]))
                goal_space_high = np.concatenate((goal_space_high, [0]))

            # theta vel
            obs_space_low = np.concatenate((obs_space_low, [-10]))
            obs_space_high = np.concatenate((obs_space_high, [10]))
            goal_space_low = np.concatenate((goal_space_low, [0]))
            goal_space_high = np.concatenate((goal_space_high, [0]))

        self.obs_space = Box(obs_space_low, obs_space_high, dtype=np.float32)
        self.goal_space = Box(goal_space_low,
                              goal_space_high,
                              dtype=np.float32)
        if self.two_frames:
            self.obs_space = concatenate_box_spaces(self.obs_space,
                                                    self.obs_space)
            self.goal_space = concatenate_box_spaces(self.goal_space,
                                                     self.goal_space)

        self.observation_space = Dict([
            ('observation', self.obs_space),
            ('desired_goal', self.goal_space),
            ('achieved_goal', self.obs_space),
            ('state_observation', self.obs_space),
            ('state_desired_goal', self.goal_space),
            ('state_achieved_goal', self.obs_space),
            ('proprio_observation', self.obs_space),
            ('proprio_desired_goal', self.goal_space),
            ('proprio_achieved_goal', self.obs_space),
        ])

        self._state_goal = None
        self._cur_obs = None
        self._prev_obs = None
        self.reset()
コード例 #9
0
    def __init__(
            self,
            # Room
            room,
            # Start and Goal
            start_config="all",
            goal_config="all",
            # Reward
            potential_type='',
            shaped=False,
            base_reward='',
            # State and Goal Representations
            state_embedding=None,
            goal_embedding=None,
            max_path_length=None,
            *args,
            **kwargs):

        # Initialize
        Serializable.quick_init(self, locals())
        MultitaskEnv.__init__(self)

        # Environment Configuration
        self._room = room
        model = self.room.get_mjcmodel()
        self.possible_positions = self.room.XY(n=50)

        with model.asfile() as f:
            MujocoEnv.__init__(self, f.name, frame_skip=self.FRAME_SKIP)

        # Initialization
        self.start_config = start_config
        self.baseline_start = self.room.get_start()
        self.start = np.zeros_like(self.baseline_start)

        self.goal_config = goal_config
        self.baseline_goal = self.room.get_target()
        self.goal = np.zeros_like(self.baseline_goal)

        # Time Limit
        self.curr_path_length = 0
        if max_path_length is None:
            self.max_path_length = self.MAX_PATH_LENGTH
        else:
            self.max_path_length = max_path_length

        # Reward Functions
        self.potential_type = potential_type
        self.shaped = shaped
        self.base_reward = base_reward

        # State Representations
        self.state_embedding = state_embedding if state_embedding is not None else IdentityEmbedding(
        )
        self.goal_embedding = goal_embedding if goal_embedding is not None else IdentityEmbedding(
        )

        # Action Space
        bounds = self.model.actuator_ctrlrange.copy()
        self.action_space = Box(low=bounds[:, 0], high=bounds[:, 1])

        # Observation Space
        example_state_obs = self._get_env_obs()
        example_obs = self.state_embedding.get_embedding(example_state_obs)
        state_obs_size = example_obs.shape[0]
        obs_size = example_obs.shape[0]
        self.obs_space = Box(-1 * np.inf * np.ones(obs_size),
                             np.inf * np.ones(obs_size))
        self.state_obs_space = Box(-1 * np.inf * np.ones(state_obs_size),
                                   np.inf * np.ones(state_obs_size))

        # Goal Space
        example_goal = self._get_env_achieved_goal(example_state_obs)
        state_goal_size = example_goal.shape[0]
        goal_size = self.goal_embedding.get_embedding(example_goal).shape[0]
        self.goal_space = Box(-1 * np.inf * np.ones(goal_size),
                              np.inf * np.ones(goal_size))
        self.state_goal_space = Box(-1 * np.inf * np.ones(state_goal_size),
                                    np.inf * np.ones(state_goal_size))

        # Final Setup
        self.observation_space = Dict([
            ('observation', self.obs_space),
            ('desired_goal', self.goal_space),
            ('achieved_goal', self.goal_space),
            ('state_observation', self.state_obs_space),
            ('state_desired_goal', self.state_goal_space),
            ('state_achieved_goal', self.state_goal_space),
        ])

        self.reset()