コード例 #1
0
    def __init__(self,
                 action_scale=1,
                 frame_skip=5,
                 obstacle_position=None,
                 obstacle_height=None):
        self.quick_init(locals())

        self.obstacle_position = obstacle_position
        self.obstacle_height = obstacle_height

        print(self.obstacle_position, self.obstacle_height)

        MultitaskEnv.__init__(self)
        self.action_scale = action_scale
        MujocoEnv.__init__(self, self.model_name, frame_skip=frame_skip)
        bounds = self.model.actuator_ctrlrange.copy()
        low = bounds[:, 0]
        high = bounds[:, 1]
        self.action_space = Box(low=low, high=high)
        obs_size = self._get_env_obs().shape[0]
        high = np.inf * np.ones(obs_size)
        low = -high
        self.obs_space = Box(low, high)
        self.observation_space = Dict([
            ('observation', self.obs_space),
            ('state_observation', self.obs_space),
        ])

        # self.obstacle_position = obstacle_position
        # self.obstacle_height = obstacle_height

        self.reset()
コード例 #2
0
    def __init__(self,
                 action_scale=1,
                 frame_skip=5,
                 force=None,
                 timestep_start=100000,
                 timestep_end=100000):
        self.quick_init(locals())
        MultitaskEnv.__init__(self)
        self.action_scale = action_scale
        MujocoEnv.__init__(self, self.model_name, frame_skip=frame_skip)
        bounds = self.model.actuator_ctrlrange.copy()
        low = bounds[:, 0]
        high = bounds[:, 1]
        self.action_space = Box(low=low, high=high)
        obs_size = self._get_env_obs().shape[0]
        high = np.inf * np.ones(obs_size)
        low = -high
        self.obs_space = Box(low, high)
        self.observation_space = Dict([
            ('observation', self.obs_space),
            ('state_observation', self.obs_space),
        ])

        self.force = force
        self.timestep_start = timestep_start
        self.timestep_end = timestep_end
        self.step_count = 0

        self.reset()
コード例 #3
0
 def __init__(self, action_scale=1, frame_skip=5, reward_type='vel_distance', indicator_threshold=.1, fixed_goal=5, fix_goal=False, max_speed=6):
     self.quick_init(locals())
     MultitaskEnv.__init__(self)
     self.action_scale = action_scale
     MujocoEnv.__init__(self, self.model_name, frame_skip=frame_skip)
     bounds = self.model.actuator_ctrlrange.copy()
     low = bounds[:, 0]
     high = bounds[:, 1]
     self.action_space = Box(low=low, high=high)
     self.reward_type = reward_type
     self.indicator_threshold=indicator_threshold
     self.fixed_goal = fixed_goal
     self.fix_goal = fix_goal
     self._state_goal = None
     self.goal_space = Box(np.array(-1*max_speed), np.array(max_speed))
     obs_size = self._get_env_obs().shape[0]
     high = np.inf * np.ones(obs_size)
     low = -high
     self.obs_space = Box(low, high)
     self.achieved_goal_space = Box(self.obs_space.low[8], self.obs_space.high[8])
     self.observation_space = Dict([
         ('observation', self.obs_space),
         ('desired_goal', self.goal_space),
         ('achieved_goal', self.achieved_goal_space),
         ('state_observation', self.obs_space),
         ('state_desired_goal', self.goal_space),
         ('state_achieved_goal', self.achieved_goal_space),
     ])
     self.reset()
コード例 #4
0
 def __init__(self,
              frame_skip=30,
              action_scale=10,
              keep_vel_in_obs=True,
              use_safety_box=False,
              fix_goal=False,
              fixed_goal=(0.05, 0.6, 0.15),
              reward_type='hand_distance',
              indicator_threshold=.05,
              goal_low=None,
              goal_high=None,
              ):
     self.quick_init(locals())
     MultitaskEnv.__init__(self)
     self.action_scale = action_scale
     MujocoEnv.__init__(self, self.model_name, frame_skip=frame_skip)
     bounds = self.model.actuator_ctrlrange.copy()
     low = bounds[:, 0]
     high = bounds[:, 1]
     self.action_space = Box(low=low, high=high)
     if goal_low is None:
         goal_low = np.array([-0.1, 0.5, 0.02])
     else:
         goal_low = np.array(goal_low)
     if goal_high is None:
         goal_high = np.array([0.1, 0.7, 0.2])
     else:
         goal_high = np.array(goal_low)
     self.safety_box = Box(
         goal_low,
         goal_high
     )
     self.keep_vel_in_obs = keep_vel_in_obs
     self.goal_space = Box(goal_low, goal_high)
     obs_size = self._get_env_obs().shape[0]
     high = np.inf * np.ones(obs_size)
     low = -high
     self.obs_space = Box(low, high)
     self.achieved_goal_space = Box(
         -np.inf * np.ones(3),
         np.inf * np.ones(3)
     )
     self.observation_space = Dict([
         ('observation', self.obs_space),
         ('desired_goal', self.goal_space),
         ('achieved_goal', self.achieved_goal_space),
         ('state_observation', self.obs_space),
         ('state_desired_goal', self.goal_space),
         ('state_achieved_goal', self.achieved_goal_space),
     ])
     self.fix_goal = fix_goal
     self.fixed_goal = np.array(fixed_goal)
     self.use_safety_box=use_safety_box
     self.prev_qpos = self.init_angles.copy()
     self.reward_type = reward_type
     self.indicator_threshold = indicator_threshold
     goal = self.sample_goal()
     self._state_goal = goal['state_desired_goal']
     self.reset()
コード例 #5
0
    def __init__(
        self,
        frame_skip=30,
        goal_low=(-.1, .5, .06, 0),
        goal_high=(.1, .6, .06, .5),
        action_reward_scale=0,
        pos_action_scale=1 / 100,
        reward_type='angle_difference',
        indicator_threshold=(.02, .03),
        fix_goal=False,
        fixed_goal=(0.15, 0.6, 0.3, 0),
        target_pos_scale=0.25,
        target_angle_scale=1,
        max_x_pos=.1,
        max_y_pos=.7,
    ):

        self.quick_init(locals())
        MultitaskEnv.__init__(self)
        MujocoEnv.__init__(self, self.model_name, frame_skip=frame_skip)

        self.reward_type = reward_type
        self.indicator_threshold = indicator_threshold

        self.fix_goal = fix_goal
        self.fixed_goal = np.array(fixed_goal)
        self.goal_space = Box(
            np.array(goal_low),
            np.array(goal_high),
        )
        self._state_goal = None

        self.action_space = Box(np.array([-1, -1, -1, -1]),
                                np.array([1, 1, 1, 1]))
        max_angle = 1.5708
        self.state_space = Box(
            np.array([-1, -1, -1, -max_angle]),
            np.array([1, 1, 1, max_angle]),
        )
        self.observation_space = Dict([
            ('observation', self.state_space),
            ('desired_goal', self.state_space),
            ('achieved_goal', self.state_space),
            ('state_observation', self.state_space),
            ('state_desired_goal', self.state_space),
            ('state_achieved_goal', self.state_space),
        ])
        self._pos_action_scale = pos_action_scale
        self.target_pos_scale = target_pos_scale
        self.action_reward_scale = action_reward_scale
        self.target_angle_scale = target_angle_scale

        self.max_x_pos = max_x_pos
        self.max_y_pos = max_y_pos
        self.min_y_pos = .5

        self.reset()
        self.reset_mocap_welds()
コード例 #6
0
ファイル: base.py プロジェクト: stevenlin1111/multiworld
 def __init__(self, model_name, frame_skip=50):
     MujocoEnv.__init__(self, model_name, frame_skip=frame_skip)
     # Resets the mocap welds that we use for actuation.
     sim = self.sim
     if sim.model.nmocap > 0 and sim.model.eq_data is not None:
         for i in range(sim.model.eq_data.shape[0]):
             if sim.model.eq_type[i] == mujoco_py.const.EQ_WELD:
                 # Define the xyz + quat of the mocap relative to the hand
                 sim.model.eq_data[i, :] = np.array(
                     [0., 0., 0., 1., 0., 0., 0.])
コード例 #7
0
    def __init__(
        self,
        frame_skip=50,
        goal_low=-.5,
        goal_high=.5,
        pos_action_scale=1 / 100,
        action_reward_scale=0,
        reward_type='angle_difference',
        indicator_threshold=0.02,
        fix_goal=False,
        fixed_goal=.25,
    ):
        self.quick_init(locals())
        MultitaskEnv.__init__(self)
        MujocoEnv.__init__(self, self.model_name, frame_skip=frame_skip)

        self.reward_type = reward_type
        self.indicator_threshold = indicator_threshold

        self.fix_goal = fix_goal
        self.fixed_goal = np.array([fixed_goal])
        self.goal_space = Box(np.array([goal_low]), np.array([goal_high]))
        self._state_goal = None

        self.action_space = Box(np.array([-1, -1, -1, -1]),
                                np.array([1, 1, 1, 1]))
        max_angle = 1.5708
        self.state_space = Box(
            np.array([-1, -1, -1, -max_angle]),
            np.array([1, 1, 1, max_angle]),
        )
        self.angle_space = Box(np.array([-max_angle]), np.array([max_angle]))
        self.observation_space = Dict([
            ('observation', self.state_space),
            ('desired_goal', self.goal_space),
            ('achieved_goal', self.angle_space),
            ('state_observation', self.state_space),
            ('state_desired_goal', self.goal_space),
            ('state_achieved_goal', self.angle_space),
        ])
        self._pos_action_scale = pos_action_scale
        self.action_reward_scale = action_reward_scale

        self.reset()
        self.reset_mocap_welds()
コード例 #8
0
ファイル: ant.py プロジェクト: szk9876/multiworld
 def __init__(self, action_scale=1, frame_skip=5):
     # self.init_serialization(locals())
     self.quick_init(locals())
     MultitaskEnv.__init__(self)
     self.action_scale = action_scale
     MujocoEnv.__init__(self, self.model_name, frame_skip=frame_skip)
     bounds = self.model.actuator_ctrlrange.copy()
     low = bounds[:, 0]
     high = bounds[:, 1]
     self.action_space = Box(low=low, high=high)
     obs_size = self._get_env_obs().shape[0]
     high = np.inf * np.ones(obs_size)
     low = -high
     self.obs_space = Box(low=low, high=high)
     self.observation_space = Dict([
         ('observation', self.obs_space),
         ('state_observation', self.obs_space),
     ])
     self.reset()
コード例 #9
0
    def __init__(self, tasks = [{'goalPos' : [0.2, 0] }] , mode_1d = False,  \
                    init_pos = [0,0] , goal_pos = [0.2, 0], mpl = 200,
                    change_task_every_episode = False,  *args, **kwargs):

        self.quick_init(locals())    
        
        model_name = get_asset_full_path('pointMass/point.xml')
        self.obj_init_pos = init_pos
       
        self.tasks = np.array(tasks)
        self.num_tasks = len(self.tasks)
        self.task = self.tasks[0]
        self.curr_path_length = 0
        self.max_path_length = mpl
        self.mode_1d = mode_1d
        self.change_task_every_episode = change_task_every_episode

        MujocoEnv.__init__(self, model_name, frame_skip=1, automatically_set_spaces=True)
        #Serializable.__init__(self, *args, **kwargs)

        self.info_logKeys = ['targetDist']
コード例 #10
0
    def __init__(
            self,
            reward_info=None,
            frame_skip=50,
            pos_action_scale=2. / 100,
            randomize_goals=True,
            hide_goal=False,
            init_block_low=(-0.05, 0.55),
            init_block_high=(0.05, 0.65),
            puck_goal_low=(-0.05, 0.55),
            puck_goal_high=(0.05, 0.65),
            hand_goal_low=(-0.05, 0.55),
            hand_goal_high=(0.05, 0.65),
            fixed_puck_goal=(0.05, 0.6),
            fixed_hand_goal=(-0.05, 0.6),
            mocap_low=(-0.1, 0.5, 0.0),
            mocap_high=(0.1, 0.7, 0.5),
            force_puck_in_goal_space=False,
    ):
        self.quick_init(locals())
        self.reward_info = reward_info
        self.randomize_goals = randomize_goals
        self._pos_action_scale = pos_action_scale
        self.hide_goal = hide_goal

        self.init_block_low = np.array(init_block_low)
        self.init_block_high = np.array(init_block_high)
        self.puck_goal_low = np.array(puck_goal_low)
        self.puck_goal_high = np.array(puck_goal_high)
        self.hand_goal_low = np.array(hand_goal_low)
        self.hand_goal_high = np.array(hand_goal_high)
        self.fixed_puck_goal = np.array(fixed_puck_goal)
        self.fixed_hand_goal = np.array(fixed_hand_goal)
        self.mocap_low = np.array(mocap_low)
        self.mocap_high = np.array(mocap_high)
        self.force_puck_in_goal_space = force_puck_in_goal_space

        self._goal_xyxy = self.sample_goal_xyxy()
        # MultitaskEnv.__init__(self, distance_metric_order=2)
        MujocoEnv.__init__(self, self.model_name, frame_skip=frame_skip)

        self.action_space = Box(
            np.array([-1, -1]),
            np.array([1, 1]),
        )
        self.obs_box = Box(
            np.array([-0.15, 0.5, -0.15, 0.5]),
            np.array([0.15, 0.7, 0.15, 0.7]),
        )
        goal_low = np.concatenate((self.hand_goal_low, self.puck_goal_low, self.puck_goal_low))
        goal_high = np.concatenate((self.hand_goal_high, self.puck_goal_high, self.puck_goal_high))
        self.goal_box = Box(
            goal_low,
            goal_high,
        )
        self.observation_space = Dict([
            ('observation', self.obs_box),
            ('state_observation', self.obs_box),
            ('desired_goal', self.goal_box),
            ('state_desired_goal', self.goal_box),
            ('achieved_goal', self.goal_box),
            ('state_achieved_goal', self.goal_box),
        ])
        # hack for state-based experiments for other envs
        # self.observation_space = Box(
        #     np.array([-0.2, 0.5, -0.2, 0.5, -0.2, 0.5]),
        #     np.array([0.2, 0.7, 0.2, 0.7, 0.2, 0.7]),
        # )
        # self.goal_space = Box(
        #     np.array([-0.2, 0.5, -0.2, 0.5, -0.2, 0.5]),
        #     np.array([0.2, 0.7, 0.2, 0.7, 0.2, 0.7]),
        # )

        self.reset()
        self.reset_mocap_welds()
コード例 #11
0
 def __init__(self, model_name, frame_skip=50):
     MujocoEnv.__init__(self, model_name, frame_skip=frame_skip)
     self.reset_mocap_welds()
コード例 #12
0
    def __init__(
            self,
            reward_type='dense',
            count_bonus_coeff=None,
            n_bins=16,
            target_radius=0.5,  # Required distance to goal when using sparse reward
            norm_order=2,
            frame_skip=5,
            two_frames=False,
            vel_in_state=True,
            ant_low=list([-6, -6]),
            ant_high=list([6, 6]),
            goal_low=list([-6, -6]),
            goal_high=list([6, 6]),
            model_path='classic_mujoco/normal_gear_ratio_ant.xml',
            use_low_gear_ratio=False,
            goal_is_xy=False,
            goal_is_qpos=False,
            init_qpos=None,
            fixed_goal=None,
            diagnostics_goal=None,
            init_xy_mode='corner',
            terminate_when_unhealthy=False,
            healthy_z_range=(0.2, 0.9),
            # health_reward=10,
            done_penalty=0,
            goal_sampling_strategy='uniform',
            presampled_goal_paths='',
            *args,
            **kwargs):
        assert init_xy_mode in {
            'corner',
            'sample-uniformly-xy-space',
            'sample-from-goal-space',  # soon to be deprecated
        }
        assert not goal_is_xy or not goal_is_qpos
        self.quick_init(locals())
        MultitaskEnv.__init__(self)
        if use_low_gear_ratio:
            model_path = "classic_mujoco/ant_maze_gear30_small_dt3_with_invis.xml"
        MujocoEnv.__init__(self,
                           model_path=get_asset_full_path(model_path),
                           frame_skip=frame_skip,
                           **kwargs)
        if goal_is_xy:
            assert reward_type.startswith('xy')

        if init_qpos is not None:
            self.init_qpos[:len(init_qpos)] = np.array(init_qpos)

        self.action_space = Box(-np.ones(8), np.ones(8), dtype=np.float32)
        self.reward_type = reward_type
        self.count_bonus_coeff = count_bonus_coeff
        self.n_bins = n_bins
        self.bin_counts = np.ones((self.n_bins, self.n_bins))
        self.target_radius = target_radius
        self.norm_order = norm_order
        self.goal_is_xy = goal_is_xy
        self.goal_is_qpos = goal_is_qpos
        self.fixed_goal = fixed_goal
        self.diagnostics_goal = diagnostics_goal
        print(f"[Ant] Diagnostics goal: {self.diagnostics_goal}")
        self.init_xy_mode = init_xy_mode
        self.terminate_when_unhealthy = terminate_when_unhealthy
        # self._healthy_reward = health_reward
        self._done_penalty = done_penalty
        self._healthy_z_range = healthy_z_range

        self.model_path = model_path
        assert goal_sampling_strategy in {'uniform', 'preset1', 'presampled'}
        self.goal_sampling_strategy = goal_sampling_strategy
        if self.goal_sampling_strategy == 'presampled':
            assert presampled_goal_paths is not None
            if not osp.exists(presampled_goal_paths):
                presampled_goal_paths = get_asset_full_path(
                    presampled_goal_paths)
            self.presampled_goals = np.load(presampled_goal_paths)
        else:
            self.presampled_goals = None

        self.ant_low, self.ant_high = np.array(ant_low), np.array(ant_high)
        goal_low, goal_high = np.array(goal_low), np.array(goal_high)
        self.two_frames = two_frames
        self.vel_in_state = vel_in_state
        if self.vel_in_state:
            obs_space_low = np.concatenate((self.ant_low, -np.ones(27)))
            obs_space_high = np.concatenate((self.ant_high, np.ones(27)))
            if goal_is_xy:
                goal_space_low = goal_low
                goal_space_high = goal_high
            else:
                goal_space_low = np.concatenate((goal_low, -np.ones(27)))
                goal_space_high = np.concatenate((goal_high, np.ones(27)))
        else:
            obs_space_low = np.concatenate((self.ant_low, -np.ones(13)))
            obs_space_high = np.concatenate((self.ant_high, np.ones(13)))
            if goal_is_xy:
                goal_space_low = goal_low
                goal_space_high = goal_high
            else:
                goal_space_low = np.concatenate((goal_low, -np.ones(13)))
                goal_space_high = np.concatenate((goal_high, np.ones(13)))

        if self.two_frames:
            self.obs_space = Box(np.concatenate(
                (obs_space_low, obs_space_low)),
                                 np.concatenate(
                                     (obs_space_high, obs_space_high)),
                                 dtype=np.float32)
            self.goal_space = Box(np.concatenate(
                (goal_space_low, goal_space_low)),
                                  np.concatenate(
                                      (goal_space_high, goal_space_high)),
                                  dtype=np.float32)
        else:
            self.obs_space = Box(obs_space_low,
                                 obs_space_high,
                                 dtype=np.float32)
            self.goal_space = Box(goal_space_low,
                                  goal_space_high,
                                  dtype=np.float32)
        qpos_space = Box(-10 * np.ones(15), 10 * np.ones(15))

        spaces = [
            ('observation', self.obs_space),
            ('desired_goal', self.goal_space),
            ('achieved_goal', self.goal_space),
            ('state_observation', self.obs_space),
            ('state_desired_goal', self.goal_space),
            ('state_achieved_goal', self.goal_space),
        ]
        if self.goal_is_xy:
            self.xy_obs_space = Box(goal_low, goal_high, dtype=np.float32)
            spaces += [
                ('xy_observation', self.xy_obs_space),
                ('xy_desired_goal', self.xy_obs_space),
                ('xy_achieved_goal', self.xy_obs_space),
            ]
        if self.goal_is_qpos:
            spaces += [
                ('qpos_desired_goal', qpos_space),
                ('qpos_achieved_goal', qpos_space),
            ]

        self.observation_space = Dict(spaces)

        self._full_state_goal = None
        self._xy_goal = None
        self._qpos_goal = None
        self._prev_obs = None
        self._cur_obs = None
コード例 #13
0
ファイル: sawyer_push_nips.py プロジェクト: wuyx/LeapPaper
    def __init__(
        self,
        frame_skip=50,
        action_scale=2. / 100,
        hand_low=(-0.2, 0.5),
        hand_high=(0.2, 0.7),
        puck_low=(-0.2, 0.5),
        puck_high=(0.2, 0.7),
        fix_goal=False,
        sample_realistic_goals=False,
        fixed_goal=(-0.05, 0.6, 0.05, 0.6),
        goal_low=None,
        goal_high=None,
        fix_reset=False,
        fixed_reset=(0, 0.55, 0.0, 0.65),
        reset_low=None,
        reset_high=None,
        hand_z_position=0.06,
        puck_z_position=0.02,
        reward_type='state_distance',
        norm_order=2,
        indicator_threshold=0.06,
        num_mocap_calls_for_reset=250,
        square_puck=False,
        heavy_puck=False,
        invisible_boundary_wall=False,
        indicator_threshold_2=0.08,
        indicator_threshold_3=0.12,
        test_mode_case_num=None,
    ):
        self.quick_init(locals())

        self.square_puck = square_puck
        self.heavy_puck = heavy_puck
        self.invisible_boundary_wall = invisible_boundary_wall

        if self.invisible_boundary_wall:
            model_name = get_asset_full_path(
                'sawyer_xyz/sawyer_push_and_reach_nips_wall.xml')
        elif self.square_puck and self.heavy_puck:
            model_name = get_asset_full_path(
                'sawyer_xyz/sawyer_push_and_reach_nips_square_heavy.xml')
        elif self.square_puck and not self.heavy_puck:
            model_name = get_asset_full_path(
                'sawyer_xyz/sawyer_push_and_reach_nips_square.xml')
        elif not self.square_puck and self.heavy_puck:
            model_name = get_asset_full_path(
                'sawyer_xyz/sawyer_push_and_reach_nips_heavy.xml')
        else:
            model_name = get_asset_full_path(
                'sawyer_xyz/sawyer_push_and_reach_nips.xml')

        MujocoEnv.__init__(self, model_name, frame_skip=frame_skip)

        hand_low = np.array(hand_low)
        hand_high = np.array(hand_high)
        mocap_low = hand_low
        mocap_high = hand_high
        self.mocap_low = np.hstack((mocap_low, np.array([0.0])))
        self.mocap_high = np.hstack((mocap_high, np.array([0.5])))
        puck_low = np.array(puck_low)
        puck_high = np.array(puck_high)

        if self.square_puck:
            self.puck_radius = np.sqrt(2) * 0.04
        else:
            self.puck_radius = 0.04

        self.ee_radius = 0.015

        # puck_low += (self.puck_radius + self.ee_radius)
        # puck_high -= (self.puck_radius + self.ee_radius)
        # print(hand_low)
        # print(hand_high)
        # print(puck_low)
        # print(puck_high)

        self.obs_space = Box(np.hstack((hand_low, puck_low)),
                             np.hstack((hand_high, puck_high)),
                             dtype=np.float32)
        self.hand_space = Box(hand_low, hand_high, dtype=np.float32)
        self.puck_space = Box(puck_low, puck_high, dtype=np.float32)
        if goal_low is None:
            goal_low = self.obs_space.low.copy()
        if goal_high is None:
            goal_high = self.obs_space.high.copy()
        if reset_low is None:
            reset_low = self.obs_space.low.copy()
        if reset_high is None:
            reset_high = self.obs_space.high.copy()
        goal_low = np.array(goal_low)
        goal_high = np.array(goal_high)
        reset_low = np.array(reset_low)
        reset_high = np.array(reset_high)
        self.goal_space = Box(goal_low, goal_high, dtype=np.float32)
        self.reset_space = Box(reset_low, reset_high, dtype=np.float32)
        self.observation_space = Dict([
            ('observation', self.obs_space),
            ('desired_goal', self.goal_space),
            ('achieved_goal', self.obs_space),
            ('state_observation', self.obs_space),
            ('state_desired_goal', self.goal_space),
            ('state_achieved_goal', self.obs_space),
            ('proprio_observation', self.hand_space),
            ('proprio_desired_goal',
             Box(goal_low[:2], goal_high[:2], dtype=np.float32)),
            ('proprio_achieved_goal', self.hand_space),
        ])

        self.num_mocap_calls_for_reset = num_mocap_calls_for_reset

        self.fix_reset = fix_reset
        self.sample_realistic_goals = sample_realistic_goals
        self.fixed_reset = np.array(fixed_reset)
        self.fix_goal = fix_goal
        self.fixed_goal = np.array(fixed_goal)
        self._state_goal = None

        self.test_mode_case_num = test_mode_case_num
        assert self.test_mode_case_num in [None, 1, 2, 3, 4, 5]
        if self.test_mode_case_num is not None:
            self.fix_reset = True
            self.fix_goal = True
        if self.test_mode_case_num == 1:
            self.fixed_reset = np.array([0.20, 0.70, -0.15, 0.55])
            self.fixed_goal = np.array([-0.20, 0.50, 0.20, 0.70])
        elif self.test_mode_case_num == 2:
            self.fixed_reset = np.array([0.20, 0.70, -0.15, 0.55])
            self.fixed_goal = np.array([0.15, 0.65, 0.20, 0.70])
        elif self.test_mode_case_num == 3:
            self.fixed_reset = np.array([0.20, 0.70, -0.15, 0.55])
            self.fixed_goal = np.array([0.20, 0.70, -0.20, 0.70])
        elif self.test_mode_case_num == 4:
            self.fixed_reset = np.array([0.20, 0.70, -0.15, 0.55])
            self.fixed_goal = np.array([0.20, 0.50, -0.20, 0.70])
        elif self.test_mode_case_num == 5:
            self.fixed_reset = np.array([0.20, 0.70, -0.15, 0.55])
            self.fixed_goal = np.array([0.20, 0.50, 0.20, 0.70])

        self.reward_type = reward_type
        self.norm_order = norm_order
        self.indicator_threshold = indicator_threshold
        self.indicator_threshold_2 = indicator_threshold_2
        self.indicator_threshold_3 = indicator_threshold_3

        self.action_space = Box(np.array([-1, -1]),
                                np.array([1, 1]),
                                dtype=np.float32)
        self._action_scale = action_scale
        self.hand_z_position = hand_z_position
        self.puck_z_position = puck_z_position
        self.reset_counter = 0
        self.reset()
コード例 #14
0
ファイル: room_env.py プロジェクト: scottemmons/gcsl
    def __init__(self,
                # Room
                room,
                # Start and Goal
                 start_config="all",
                 goal_config="all",
                # Reward
                 potential_type='',
                 shaped=False,
                 base_reward='',
                # State and Goal Representations
                 use_state_images=False,
                 use_goal_images=False,
                 image_resolution=64,
                 # Time Limits 
                 max_path_length=None,
                 *args, **kwargs
                ):

        # Initialize
        Serializable.quick_init(self, locals())
        MultitaskEnv.__init__(self)
        
        
        # Environment Configuration
        self._room = room
        model = self.room.get_mjcmodel()
        self.possible_positions = self.room.XY(n=50)

        with model.asfile() as f:
            MujocoEnv.__init__(self, f.name, frame_skip=self.FRAME_SKIP)


        # Initialization 
        self.start_config = start_config
        self.baseline_start = self.room.get_start()
        self.start = np.zeros_like(self.baseline_start)

        self.goal_config = goal_config
        self.baseline_goal = self.room.get_target()
        self.goal = np.zeros_like(self.baseline_goal)

        # Time Limit
        self.curr_path_length = 0
        if max_path_length is None:
            self.max_path_length = self.MAX_PATH_LENGTH
        else:
            self.max_path_length = max_path_length

        # Reward Functions
        self.potential_type = potential_type
        self.shaped = shaped
        self.base_reward = base_reward

        # Action Space
        bounds = self.model.actuator_ctrlrange.copy()
        self.action_space = Box(low=bounds[:, 0], high=bounds[:, 1])

        self.use_state_images = use_state_images
        self.use_goal_images = use_goal_images
        self.image_resolution = image_resolution

        # Observation Space
        example_state_obs = self._get_env_obs()
        if self.use_state_images:
            example_obs = self.get_image(self.image_resolution, self.image_resolution, camera_name='topview')
        else:
            example_obs = example_state_obs
        
        state_obs_shape = example_obs.shape
        obs_shape = example_obs.shape
        self.obs_space = Box(-1 * np.ones(obs_shape),np.ones(obs_shape))
        self.state_obs_space = Box(-1 * np.ones(state_obs_shape), np.ones(state_obs_shape))


        # Goal Space
        
        
        example_state_goal = self._get_env_achieved_goal(example_state_obs)
        if self.use_goal_images:
            example_goal = self.get_image(self.image_resolution, self.image_resolution, camera_name='topview')
        else:
            example_goal = example_state_goal

        state_goal_shape = example_state_goal.shape
        goal_shape = example_goal.shape
        self.goal_space = Box(-1 * np.ones(goal_shape), np.ones(goal_shape))
        self.state_goal_space =  Box(-1 * np.ones(state_goal_shape), np.ones(state_goal_shape))

        # Final Setup
        self.observation_space = Dict([
            ('observation', self.obs_space),
            ('desired_goal', self.goal_space),
            ('achieved_goal', self.goal_space),
            ('state_observation', self.state_obs_space),
            ('state_desired_goal', self.state_goal_space),
            ('state_achieved_goal', self.state_goal_space),
        ])

        self.reset()
コード例 #15
0
    def __init__(
        self,
        reward_info=None,
        frame_skip=50,
        pos_action_scale=4. / 100,
        randomize_goals=True,
        puck_goal_low=(-0.1, 0.5),
        puck_goal_high=(0.1, 0.7),
        hand_goal_low=(-0.1, 0.5),
        hand_goal_high=(0.1, 0.7),
        mocap_low=(-0.1, 0.5, 0.0),
        mocap_high=(0.1, 0.7, 0.5),
        # unused
        init_block_low=(-0.05, 0.55),
        init_block_high=(0.05, 0.65),
        fixed_puck_goal=(0.05, 0.6),
        fixed_hand_goal=(-0.05, 0.6),
        # multi-object
        num_objects=3,
        fixed_colors=True,
        seed=1,
        filename='sawyer_multiobj.xml',
        object_mass=1,
        # object_meshes=['Bowl', 'GlassBowl', 'LotusBowl01', 'ElephantBowl', 'RuggedBowl'],
        object_meshes=None,
        obj_classname=None,
        block_height=0.02,
        block_width=0.02,
        cylinder_radius=0.035,
        finger_sensors=False,
        maxlen=0.07,
        minlen=0.01,
        preload_obj_dict=None,
        reset_to_initial_position=True,
        object_low=(-np.inf, -np.inf, -np.inf),
        object_high=(np.inf, np.inf, np.inf),
        action_repeat=1,
        fixed_start=False,
        fixed_start_pos=(0, 0.6),
        goal_moves_one_object=True,
        num_scene_objects=[
            0, 1, 2
        ],  # list of number of objects that can appear per scene
        object_height=0.02,
        use_textures=False,
        init_camera=None,
        sliding_joints=False,
    ):
        if seed:
            np.random.seed(seed)
            self.env_seed = seed
        self.quick_init(locals())
        self.reward_info = reward_info
        self.randomize_goals = randomize_goals
        self._pos_action_scale = pos_action_scale
        self.reset_to_initial_position = reset_to_initial_position

        self.init_block_low = np.array(init_block_low)
        self.init_block_high = np.array(init_block_high)
        self.puck_goal_low = np.array(puck_goal_low)
        self.puck_goal_high = np.array(puck_goal_high)
        self.hand_goal_low = np.array(hand_goal_low)
        self.hand_goal_high = np.array(hand_goal_high)
        self.fixed_puck_goal = np.array(fixed_puck_goal)
        self.fixed_hand_goal = np.array(fixed_hand_goal)
        self.mocap_low = np.array(mocap_low)
        self.mocap_high = np.array(mocap_high)
        self.object_low = np.array(object_low)
        self.object_high = np.array(object_high)
        self.action_repeat = action_repeat
        self.fixed_colors = fixed_colors
        self.goal_moves_one_object = goal_moves_one_object

        self.num_objects = num_objects
        self.num_scene_objects = num_scene_objects
        self.object_height = object_height
        self.fixed_start = fixed_start
        self.fixed_start_pos = np.array(fixed_start_pos)
        self.maxlen = maxlen
        self.use_textures = use_textures
        self.sliding_joints = sliding_joints
        self.cur_objects = np.array([0, 1, 2])
        self.preload_obj_dict = preload_obj_dict

        self.num_cur_objects = 0
        # Generate XML
        base_filename = asset_base_path + filename
        friction_params = (0.1, 0.1, 0.02)
        self.obj_stat_prop = create_object_xml(
            base_filename, num_objects, object_mass, friction_params,
            object_meshes, finger_sensors, maxlen, minlen, preload_obj_dict,
            obj_classname, block_height, block_width, cylinder_radius,
            use_textures, sliding_joints)
        gen_xml = create_root_xml(base_filename)
        MujocoEnv.__init__(self, gen_xml, frame_skip=frame_skip)
        clean_xml(gen_xml)

        if self.use_textures:
            self.modder = TextureModder(self.sim)

        self.state_goal = self.sample_goal_for_rollout()
        # MultitaskEnv.__init__(self, distance_metric_order=2)
        # MujocoEnv.__init__(self, gen_xml, frame_skip=frame_skip)

        self.action_space = Box(
            np.array([-1, -1]),
            np.array([1, 1]),
        )

        self.num_objects = num_objects
        low = (self.num_scene_objects[0] + 1) * [-0.2, 0.5]
        high = (self.num_scene_objects[0] + 1) * [0.2, 0.7]
        self.obs_box = Box(
            np.array(low),
            np.array(high),
        )
        goal_low = np.array(
            low)  # np.concatenate((self.hand_goal_low, self.puck_goal_low))
        goal_high = np.array(
            high)  # np.concatenate((self.hand_goal_high, self.puck_goal_high))
        self.goal_box = Box(
            goal_low,
            goal_high,
        )
        self.total_objects = self.num_objects + 1
        self.objects_box = Box(
            np.zeros((self.total_objects, )),
            np.ones((self.total_objects, )),
        )
        self.observation_space = Dict([
            ('observation', self.obs_box),
            ('state_observation', self.obs_box),
            ('desired_goal', self.goal_box),
            ('state_desired_goal', self.goal_box),
            ('achieved_goal', self.goal_box),
            ('state_achieved_goal', self.goal_box),
            ('objects', self.objects_box),
        ])
        # hack for state-based experiments for other envs
        # self.observation_space = Box(
        #     np.array([-0.2, 0.5, -0.2, 0.5, -0.2, 0.5]),
        #     np.array([0.2, 0.7, 0.2, 0.7, 0.2, 0.7]),
        # )
        # self.goal_space = Box(
        #     np.array([-0.2, 0.5, -0.2, 0.5, -0.2, 0.5]),
        #     np.array([0.2, 0.7, 0.2, 0.7, 0.2, 0.7]),
        # )

        self.set_initial_object_positions()

        if use_textures:
            super().initialize_camera(init_camera)
            self.initialized_camera = init_camera

        self.reset()
        self.reset_mocap_welds()
コード例 #16
0
    def __init__(
            self,
            obj_low=None,
            obj_high=None,

            hand_low=(-0.1, 0.55, 0.05),
            hand_high=(0.05, 0.65, 0.2),

            reward_type='hand_and_obj_distance',
            indicator_threshold=0.06,
            frame_skip = 50,
            obj_init_positions=((0, 0.6, 0.02),),
            random_init=False,

            num_objects=1,
            fix_goal=False,

            mocap_low=(-0.13, 0.57, 0.04),
            mocap_high=(0.08, 0.73, 0.2),
            action_scale=0.02,

            fixed_goal=(0.15, 0.6, 0.055, -0.15, 0.6),
            goal_low=None,
            goal_high=None,
            reset_free=False,
            filename='sawyer_pick_and_place_multiobj.xml',
            object_mass=1,
            # object_meshes=['Bowl', 'GlassBowl', 'LotusBowl01', 'ElephantBowl', 'RuggedBowl'],
            object_meshes=None,
            obj_classname = None,
            block_height=0.04,
            block_width = 0.04,
            cylinder_radius = 0.015,
            finger_sensors=False,
            maxlen=0.06,
            minlen=0.01,
            preload_obj_dict=None,
            hide_goal_markers=False,
            oracle_reset_prob=0.0,
            presampled_goals=None,
            num_goals_presampled=2,
            p_obj_in_hand=1,

            **kwargs
    ):
        self.quick_init(locals())
        MultitaskEnv.__init__(self)


        base_filename = asset_base_path + filename
        friction_params = (1, 1, 2)
        self.obj_stat_prop = create_object_xml(base_filename, num_objects, object_mass,
                                              friction_params, object_meshes, finger_sensors,
                                               maxlen, minlen, preload_obj_dict, obj_classname,
                                               block_height, block_width, cylinder_radius)

        gen_xml = create_root_xml(base_filename)
        MujocoEnv.__init__(self, gen_xml, frame_skip=frame_skip)
        self.reset_mocap_welds()

        clean_xml(gen_xml)

        self.mocap_low = mocap_low
        self.mocap_high = mocap_high
        self.action_scale = action_scale


        self.hand_low = np.array(hand_low)
        self.hand_high = np.array(hand_high)

        if obj_low is None:
            obj_low = self.hand_low
        if obj_high is None:
            obj_high = self.hand_high
        self.obj_low = obj_low
        self.obj_high = obj_high
        if goal_low is None:
            goal_low = np.hstack((self.hand_low, obj_low))
        if goal_high is None:
            goal_high = np.hstack((self.hand_high, obj_high))

        self.reward_type = reward_type
        self.random_init = random_init
        self.p_obj_in_hand = p_obj_in_hand
        self.indicator_threshold = indicator_threshold

        self.obj_init_z = obj_init_positions[0][2]
        self.obj_init_positions = np.array(obj_init_positions)
        self.last_obj_pos = self.obj_init_positions[0]

        self.fix_goal = fix_goal
        self.fixed_goal = np.array(fixed_goal)
        self._state_goal = None
        self.reset_free = reset_free
        self.oracle_reset_prob = oracle_reset_prob

        self.obj_ind_to_manip = 0

        self.hide_goal_markers = hide_goal_markers

        self.action_space = Box(
            np.array([-1, -1, -1, -1]),
            np.array([1, 1, 1, 1]),
            dtype=np.float32
        )
        self.hand_and_obj_space = Box(
            np.hstack((self.hand_low, obj_low)),
            np.hstack((self.hand_high, obj_high)),
            dtype=np.float32
        )
        self.hand_space = Box(
            self.hand_low,
            self.hand_high,
            dtype=np.float32
        )
        self.gripper_and_hand_and_obj_space = Box(
            np.hstack(([0.0], self.hand_low, obj_low)),
            np.hstack(([0.04], self.hand_high, obj_high)),
            dtype=np.float32
        )
        self.num_objects = num_objects
        self.maxlen = maxlen

        self.observation_space = Dict([
            ('observation', self.gripper_and_hand_and_obj_space),
            ('desired_goal', self.hand_and_obj_space),
            ('achieved_goal', self.hand_and_obj_space),
            ('state_observation', self.gripper_and_hand_and_obj_space),
            ('state_desired_goal', self.hand_and_obj_space),
            ('state_achieved_goal', self.hand_and_obj_space),
            ('proprio_observation', self.hand_space),
            ('proprio_desired_goal', self.hand_space),
            ('proprio_achieved_goal', self.hand_space),
        ])
        self.hand_reset_pos = np.array([0, .6, .2])

        if presampled_goals is not None:
            self._presampled_goals = presampled_goals
            self.num_goals_presampled = len(list(self._presampled_goals.values)[0])
        else:
            # presampled_goals will be created when sample_goal is first called
            self._presampled_goals = None
            self.num_goals_presampled = num_goals_presampled
        self.num_goals_presampled = 10
        self.picked_up_object = False
        self.train_pickups = 0
        self.eval_pickups = 0
        self.cur_mode = 'train'
        self.reset()
コード例 #17
0
    def __init__(
        self,
        puck_count: int = 2,
        reward_info=None,
        frame_skip: int = 50,
        pos_action_scale: float = 2. / 100,
        randomize_goals: bool = True,
        hide_goal: bool = False,
        init_block_low: Tuple[float, float] = (-0.05, 0.55),
        init_block_high: Tuple[float, float] = (0.05, 0.65),
        puck_goal_low: Tuple[float, float] = (-0.05, 0.55),
        puck_goal_high: Tuple[float, float] = (0.05, 0.65),
        hand_goal_low: Tuple[float, float] = (-0.05, 0.55),
        hand_goal_high: Tuple[float, float] = (0.05, 0.65),
        fixed_puck_goal: Tuple[float, float] = (0.05, 0.6),
        fixed_hand_goal: Tuple[float, float] = (-0.05, 0.6),
        mocap_low: Tuple[float, float, float] = (-0.1, 0.5, 0.0),
        mocap_high: Tuple[float, float, float] = (0.1, 0.7, 0.5),
        area_bound_low: Tuple[float, float] = (-0.2, 0.5),
        area_bound_high: Tuple[float, float] = (0.2, 0.7),
        force_puck_in_goal_space=False,
    ):
        self.quick_init(locals())
        self.puck_count = puck_count
        self.reward_info = reward_info
        self.randomize_goals = randomize_goals
        self._pos_action_scale = pos_action_scale
        self.hide_goal = hide_goal

        self.init_block_low = np.array(init_block_low)
        self.init_block_high = np.array(init_block_high)
        self.hand_goal_low = np.array(hand_goal_low)
        self.hand_goal_high = np.array(hand_goal_high)

        self.puck_goal_low = np.array(puck_goal_low * puck_count)
        self.puck_goal_high = np.array(puck_goal_high * puck_count)
        self.fixed_puck_goal = np.array(fixed_puck_goal * puck_count)
        self.fixed_hand_goal = np.array(fixed_hand_goal * puck_count)

        self.mocap_low = np.array(mocap_low)
        self.mocap_high = np.array(mocap_high)
        self.force_puck_in_goal_space = force_puck_in_goal_space

        self._goal_xyxy = self.sample_goal_xyxy()

        MujocoEnv.__init__(self, self.model_name, frame_skip=frame_skip)

        self.action_space = spaces.Box(np.array([-1, -1]), np.array([1, 1]))
        self.obs_box = spaces.Box(
            np.array(area_bound_low * (puck_count + 1)),
            np.array(area_bound_high * (puck_count + 1)),
        )
        goal_low = np.concatenate((self.hand_goal_low, self.puck_goal_low))
        goal_high = np.concatenate((self.hand_goal_high, self.puck_goal_high))
        self.goal_box = spaces.Box(goal_low, goal_high)

        self.observation_space = spaces.Dict([
            ('observation', self.obs_box),
            ('state_observation', self.obs_box),
            ('desired_goal', self.goal_box),
            ('state_desired_goal', self.goal_box),
            ('achieved_goal', self.goal_box),
            ('state_achieved_goal', self.goal_box),
        ])

        self.endeff_id = self.model.body_name2id('leftclaw')
        self.hand_goal_id = self.model.body_name2id('hand-goal')
        self.puck_ids = [
            self.model.body_name2id('puck-{}'.format(i))
            for i in range(puck_count)
        ]
        self.puck_goal_ids = [
            self.model.body_name2id('puck-goal-{}'.format(i))
            for i in range(puck_count)
        ]

        self.hand_goal_qpos_index = self.model.get_joint_qpos_addr(
            'hand-goal-joint')[0]
        self.hand_goal_qvel_index = self.model.get_joint_qvel_addr(
            'hand-goal-joint')[0]
        self.puck_qpos_indices = [
            self.model.get_joint_qpos_addr('puckjoint-{}'.format(i))[0]
            for i in range(puck_count)
        ]
        self.puck_qvel_indices = [
            self.model.get_joint_qvel_addr('puckjoint-{}'.format(i))[0]
            for i in range(puck_count)
        ]
        self.puck_goal_qpos_indices = [
            self.model.get_joint_qpos_addr('puck-goal-joint-{}'.format(i))[0]
            for i in range(puck_count)
        ]
        self.puck_goal_qvel_indices = [
            self.model.get_joint_qvel_addr('puck-goal-joint-{}'.format(i))[0]
            for i in range(puck_count)
        ]

        self.reset()
        self.reset_mocap_welds()
コード例 #18
0
    def __init__(
        self,
        reward_info=None,
        frame_skip=2,
        pos_action_scale=2. / 100,
        randomize_goals=False,
        puck_goal_low=(-0.1, 0.5),
        puck_goal_high=(0.1, 0.7),
        hand_goal_low=(-0.1, 0.5),
        hand_goal_high=(0.1, 0.7),
        mocap_low=(-0.1, 0.5, 0.02),  #(-0.1, 0.5, 0)
        mocap_high=(0.1, 0.7, 0.5),
        # unused
        init_block_low=(-0.05, 0.55),
        init_block_high=(0.05, 0.65),
        fixed_puck_goal=(0., 0.7),
        fixed_hand_goal=(-0.05, 0.6),
        # multi-object
        num_objects=1,
        target_object_idx=0,
        filename='sawyer_multiobj.xml',
        object_mass=1,
        # object_meshes=['Bowl', 'GlassBowl', 'LotusBowl01', 'ElephantBowl', 'RuggedBowl'],
        object_meshes=None,
        obj_classname=None,
        block_height=0.02,
        block_width=0.02,
        cylinder_radius=0.05,
        finger_sensors=False,
        maxlen=0.06,
        minlen=0.01,
        preload_obj_dict=None,
        reset_to_initial_position=True,
        object_low=(-0.1, 0.5, 0.0),
        object_high=(0.1, 0.7, 0.5),
        action_repeat=1,
        fixed_start=True,
        objjoint_xy=True,
    ):
        self.quick_init(locals())
        self.reward_info = reward_info
        self.randomize_goals = randomize_goals
        self._pos_action_scale = pos_action_scale
        self.reset_to_initial_position = reset_to_initial_position
        self.objjoint_xy = objjoint_xy

        self.init_block_low = np.array(init_block_low)
        self.init_block_high = np.array(init_block_high)
        self.puck_goal_low = np.array(puck_goal_low)
        self.puck_goal_high = np.array(puck_goal_high)
        self.hand_goal_low = np.array(hand_goal_low)
        self.hand_goal_high = np.array(hand_goal_high)
        self.fixed_puck_goal = np.array(fixed_puck_goal)
        self.fixed_hand_goal = np.array(fixed_hand_goal)
        self.mocap_low = np.array(mocap_low)
        self.mocap_high = np.array(mocap_high)
        self.object_low = np.array(object_low)
        self.object_high = np.array(object_high)
        self.action_repeat = action_repeat

        self.num_objects = num_objects
        self.target_object_idx = target_object_idx
        self.fixed_start = fixed_start
        self.maxlen = maxlen

        # Generate XML
        base_filename = asset_base_path + filename
        friction_params = (0.1, 0.1, 0.02)
        self.obj_stat_prop = create_object_xml(base_filename,
                                               num_objects,
                                               object_mass,
                                               friction_params,
                                               object_meshes,
                                               finger_sensors,
                                               maxlen,
                                               minlen,
                                               preload_obj_dict,
                                               obj_classname,
                                               block_height,
                                               block_width,
                                               cylinder_radius,
                                               objjoint_xy=objjoint_xy)
        gen_xml = create_root_xml(base_filename)
        MujocoEnv.__init__(self, gen_xml, frame_skip=frame_skip)
        clean_xml(gen_xml)

        self.state_goal = self.sample_goal_xyxy()
        # MultitaskEnv.__init__(self, distance_metric_order=2)
        # MujocoEnv.__init__(self, gen_xml, frame_skip=frame_skip)

        # self.action_space = Box(
        #     np.array([-1, -1]),
        #     np.array([1, 1]),
        # )

        # Add quaternion
        self.action_space = Box(
            np.array([-1, -1, 0, -1, -1, -1]),
            np.array([1, 1, 2 * np.pi, 1, 1, 1]),
        )

        self.num_objects = num_objects
        low = (self.num_objects + 1) * [-0.2, 0.5]
        high = (self.num_objects + 1) * [0.2, 0.7]
        self.observation_space = Box(
            np.array(low),
            np.array(high),
        )
        goal_low = np.array(
            [-0.2,
             0.5])  # np.concatenate((self.hand_goal_low, self.puck_goal_low))
        goal_high = np.array([
            0.2, 0.7
        ])  # np.concatenate((self.hand_goal_high, self.puck_goal_high))
        self.goal_box = Box(
            goal_low,
            goal_high,
        )
        # hack for state-based experiments for other envs
        # self.observation_space = Box(
        #     np.array([-0.2, 0.5, -0.2, 0.5, -0.2, 0.5]),
        #     np.array([0.2, 0.7, 0.2, 0.7, 0.2, 0.7]),
        # )
        # self.goal_space = Box(
        #     np.array([-0.2, 0.5, -0.2, 0.5, -0.2, 0.5]),
        #     np.array([0.2, 0.7, 0.2, 0.7, 0.2, 0.7]),
        # )

        self.set_initial_object_positions()

        self.reset()
        self.reset_mocap_welds()
コード例 #19
0
ファイル: wheeled_car.py プロジェクト: wuyx/LeapPaper
    def __init__(
            self,
            reward_type='state_distance',
            norm_order=2,
            action_scale=20,
            frame_skip=3,
            two_frames=False,
            vel_in_state=True,
            z_in_state=True,
            car_low=(-1.90, -1.90),  #list([-1.60, -1.60]),
            car_high=(1.90, 1.90),  #list([1.60, 1.60]),
            goal_low=(-1.90, -1.90),  #list([-1.60, -1.60]),
            goal_high=(1.90, 1.90),  #list([1.60, 1.60]),
            model_path='wheeled_car.xml',
            reset_low=None,
            reset_high=None,
            *args,
            **kwargs):
        self.quick_init(locals())
        MultitaskEnv.__init__(self)
        MujocoEnv.__init__(self,
                           model_path=get_asset_full_path('locomotion/' +
                                                          model_path),
                           frame_skip=frame_skip,
                           **kwargs)

        self.action_space = Box(np.array([-1, -1]),
                                np.array([1, 1]),
                                dtype=np.float32)
        self.action_scale = action_scale

        # set radius
        if model_path in ['wheeled_car.xml', 'wheeled_car_u_wall.xml']:
            self.car_radius = np.sqrt(2) * 0.34
        elif model_path == 'wheeled_car_old.xml':
            self.car_radius = 0.30
        else:
            raise NotImplementedError

        # set walls
        if model_path == 'wheeled_car_u_wall.xml':
            self.walls = [
                Wall(0, -0.5, 0.75, 0.05, self.car_radius),
                Wall(0.70, 0.05, 0.05, 0.5, self.car_radius),
                Wall(-0.70, 0.05, 0.05, 0.5, self.car_radius),
            ]
        else:
            self.walls = []

        self.reward_type = reward_type
        self.norm_order = norm_order

        self.car_low, self.car_high = np.array(car_low), np.array(car_high)
        self.goal_low, self.goal_high = np.array(goal_low), np.array(goal_high)

        if reset_low is None:
            self.reset_low = np.array(car_low)
        else:
            self.reset_low = np.array(reset_low)
        if reset_high is None:
            self.reset_high = np.array(car_high)
        else:
            self.reset_high = np.array(reset_high)

        self.car_low += self.car_radius
        self.car_high -= self.car_radius
        self.goal_low += self.car_radius
        self.goal_high -= self.car_radius
        self.reset_low += self.car_radius
        self.reset_high -= self.car_radius

        self.two_frames = two_frames
        self.vel_in_state = vel_in_state
        self.z_in_state = z_in_state

        # x and y pos
        obs_space_low = np.copy(self.car_low)
        obs_space_high = np.copy(self.car_high)
        goal_space_low = np.copy(self.goal_low)
        goal_space_high = np.copy(self.goal_high)

        # z pos
        if self.z_in_state:
            obs_space_low = np.concatenate((obs_space_low, [-1]))
            obs_space_high = np.concatenate((obs_space_high, [0.03]))
            goal_space_low = np.concatenate((goal_space_low, [0]))
            goal_space_high = np.concatenate((goal_space_high, [0]))

        # sin and cos
        obs_space_low = np.concatenate((obs_space_low, [-1, -1]))
        obs_space_high = np.concatenate((obs_space_high, [1, 1]))
        goal_space_low = np.concatenate((goal_space_low, [-1, -1]))
        goal_space_high = np.concatenate((goal_space_high, [1, 1]))

        if self.vel_in_state:
            # x and y vel
            obs_space_low = np.concatenate((obs_space_low, [-10, -10]))
            obs_space_high = np.concatenate((obs_space_high, [10, 10]))
            goal_space_low = np.concatenate((goal_space_low, [0, 0]))
            goal_space_high = np.concatenate((goal_space_high, [0, 0]))

            # z vel
            if self.z_in_state:
                obs_space_low = np.concatenate((obs_space_low, [-10]))
                obs_space_high = np.concatenate((obs_space_high, [10]))
                goal_space_low = np.concatenate((goal_space_low, [0]))
                goal_space_high = np.concatenate((goal_space_high, [0]))

            # theta vel
            obs_space_low = np.concatenate((obs_space_low, [-10]))
            obs_space_high = np.concatenate((obs_space_high, [10]))
            goal_space_low = np.concatenate((goal_space_low, [0]))
            goal_space_high = np.concatenate((goal_space_high, [0]))

        self.obs_space = Box(obs_space_low, obs_space_high, dtype=np.float32)
        self.goal_space = Box(goal_space_low,
                              goal_space_high,
                              dtype=np.float32)
        if self.two_frames:
            self.obs_space = concatenate_box_spaces(self.obs_space,
                                                    self.obs_space)
            self.goal_space = concatenate_box_spaces(self.goal_space,
                                                     self.goal_space)

        self.observation_space = Dict([
            ('observation', self.obs_space),
            ('desired_goal', self.goal_space),
            ('achieved_goal', self.obs_space),
            ('state_observation', self.obs_space),
            ('state_desired_goal', self.goal_space),
            ('state_achieved_goal', self.obs_space),
            ('proprio_observation', self.obs_space),
            ('proprio_desired_goal', self.goal_space),
            ('proprio_achieved_goal', self.obs_space),
        ])

        self._state_goal = None
        self._cur_obs = None
        self._prev_obs = None
        self.reset()
コード例 #20
0
    def __init__(self,
                 reward_type='dense',
                 norm_order=2,
                 action_scale=0.15,
                 frame_skip=100,
                 ball_low=(-4.0, -4.0),
                 ball_high=(4.0, 4.0),
                 goal_low=(-4.0, -4.0),
                 goal_high=(4.0, 4.0),
                 model_path='pointmass_u_wall_big.xml',
                 reset_low=None,
                 reset_high=None,
                 v_func_heatmap_bounds=(-2.0, 0.0),
                 *args,
                 **kwargs):
        self.quick_init(locals())
        MultitaskEnv.__init__(self)
        MujocoEnv.__init__(self,
                           model_path=get_asset_full_path('pointmass/' +
                                                          model_path),
                           frame_skip=frame_skip,
                           **kwargs)

        self.action_space = Box(np.array([-1, -1]),
                                np.array([1, 1]),
                                dtype=np.float32)
        self.action_scale = action_scale
        self.ball_radius = 0.50
        self.walls = [
            Wall(0, 1.0, 2.5, 0.5, self.ball_radius),
            Wall(2.0, -0.5, 0.5, 2.0, self.ball_radius),
            Wall(-2.0, -0.5, 0.5, 2.0, self.ball_radius),
        ]

        self.reward_type = reward_type
        self.norm_order = norm_order

        self.ball_low, self.ball_high = np.array(ball_low), np.array(ball_high)
        self.goal_low, self.goal_high = np.array(goal_low), np.array(goal_high)

        if reset_low is None:
            self.reset_low = np.array(ball_low)
        else:
            self.reset_low = np.array(reset_low)
        if reset_high is None:
            self.reset_high = np.array(ball_high)
        else:
            self.reset_high = np.array(reset_high)

        obs_space_low = np.copy(self.ball_low)
        obs_space_high = np.copy(self.ball_high)
        goal_space_low = np.copy(self.goal_low)
        goal_space_high = np.copy(self.goal_high)

        self.obs_space = Box(obs_space_low, obs_space_high, dtype=np.float32)
        self.goal_space = Box(goal_space_low,
                              goal_space_high,
                              dtype=np.float32)

        self.observation_space = Dict([
            ('observation', self.obs_space),
            ('desired_goal', self.goal_space),
            ('achieved_goal', self.obs_space),
            ('state_observation', self.obs_space),
            ('state_desired_goal', self.goal_space),
            ('state_achieved_goal', self.obs_space),
            ('proprio_observation', self.obs_space),
            ('proprio_desired_goal', self.goal_space),
            ('proprio_achieved_goal', self.obs_space),
        ])

        self.v_func_heatmap_bounds = v_func_heatmap_bounds

        self._state_goal = None
        self.reset()