def __init__(self, action_scale=1, frame_skip=5, obstacle_position=None, obstacle_height=None): self.quick_init(locals()) self.obstacle_position = obstacle_position self.obstacle_height = obstacle_height print(self.obstacle_position, self.obstacle_height) MultitaskEnv.__init__(self) self.action_scale = action_scale MujocoEnv.__init__(self, self.model_name, frame_skip=frame_skip) bounds = self.model.actuator_ctrlrange.copy() low = bounds[:, 0] high = bounds[:, 1] self.action_space = Box(low=low, high=high) obs_size = self._get_env_obs().shape[0] high = np.inf * np.ones(obs_size) low = -high self.obs_space = Box(low, high) self.observation_space = Dict([ ('observation', self.obs_space), ('state_observation', self.obs_space), ]) # self.obstacle_position = obstacle_position # self.obstacle_height = obstacle_height self.reset()
def __init__(self, action_scale=1, frame_skip=5, force=None, timestep_start=100000, timestep_end=100000): self.quick_init(locals()) MultitaskEnv.__init__(self) self.action_scale = action_scale MujocoEnv.__init__(self, self.model_name, frame_skip=frame_skip) bounds = self.model.actuator_ctrlrange.copy() low = bounds[:, 0] high = bounds[:, 1] self.action_space = Box(low=low, high=high) obs_size = self._get_env_obs().shape[0] high = np.inf * np.ones(obs_size) low = -high self.obs_space = Box(low, high) self.observation_space = Dict([ ('observation', self.obs_space), ('state_observation', self.obs_space), ]) self.force = force self.timestep_start = timestep_start self.timestep_end = timestep_end self.step_count = 0 self.reset()
def __init__(self, action_scale=1, frame_skip=5, reward_type='vel_distance', indicator_threshold=.1, fixed_goal=5, fix_goal=False, max_speed=6): self.quick_init(locals()) MultitaskEnv.__init__(self) self.action_scale = action_scale MujocoEnv.__init__(self, self.model_name, frame_skip=frame_skip) bounds = self.model.actuator_ctrlrange.copy() low = bounds[:, 0] high = bounds[:, 1] self.action_space = Box(low=low, high=high) self.reward_type = reward_type self.indicator_threshold=indicator_threshold self.fixed_goal = fixed_goal self.fix_goal = fix_goal self._state_goal = None self.goal_space = Box(np.array(-1*max_speed), np.array(max_speed)) obs_size = self._get_env_obs().shape[0] high = np.inf * np.ones(obs_size) low = -high self.obs_space = Box(low, high) self.achieved_goal_space = Box(self.obs_space.low[8], self.obs_space.high[8]) self.observation_space = Dict([ ('observation', self.obs_space), ('desired_goal', self.goal_space), ('achieved_goal', self.achieved_goal_space), ('state_observation', self.obs_space), ('state_desired_goal', self.goal_space), ('state_achieved_goal', self.achieved_goal_space), ]) self.reset()
def __init__(self, frame_skip=30, action_scale=10, keep_vel_in_obs=True, use_safety_box=False, fix_goal=False, fixed_goal=(0.05, 0.6, 0.15), reward_type='hand_distance', indicator_threshold=.05, goal_low=None, goal_high=None, ): self.quick_init(locals()) MultitaskEnv.__init__(self) self.action_scale = action_scale MujocoEnv.__init__(self, self.model_name, frame_skip=frame_skip) bounds = self.model.actuator_ctrlrange.copy() low = bounds[:, 0] high = bounds[:, 1] self.action_space = Box(low=low, high=high) if goal_low is None: goal_low = np.array([-0.1, 0.5, 0.02]) else: goal_low = np.array(goal_low) if goal_high is None: goal_high = np.array([0.1, 0.7, 0.2]) else: goal_high = np.array(goal_low) self.safety_box = Box( goal_low, goal_high ) self.keep_vel_in_obs = keep_vel_in_obs self.goal_space = Box(goal_low, goal_high) obs_size = self._get_env_obs().shape[0] high = np.inf * np.ones(obs_size) low = -high self.obs_space = Box(low, high) self.achieved_goal_space = Box( -np.inf * np.ones(3), np.inf * np.ones(3) ) self.observation_space = Dict([ ('observation', self.obs_space), ('desired_goal', self.goal_space), ('achieved_goal', self.achieved_goal_space), ('state_observation', self.obs_space), ('state_desired_goal', self.goal_space), ('state_achieved_goal', self.achieved_goal_space), ]) self.fix_goal = fix_goal self.fixed_goal = np.array(fixed_goal) self.use_safety_box=use_safety_box self.prev_qpos = self.init_angles.copy() self.reward_type = reward_type self.indicator_threshold = indicator_threshold goal = self.sample_goal() self._state_goal = goal['state_desired_goal'] self.reset()
def __init__( self, frame_skip=30, goal_low=(-.1, .5, .06, 0), goal_high=(.1, .6, .06, .5), action_reward_scale=0, pos_action_scale=1 / 100, reward_type='angle_difference', indicator_threshold=(.02, .03), fix_goal=False, fixed_goal=(0.15, 0.6, 0.3, 0), target_pos_scale=0.25, target_angle_scale=1, max_x_pos=.1, max_y_pos=.7, ): self.quick_init(locals()) MultitaskEnv.__init__(self) MujocoEnv.__init__(self, self.model_name, frame_skip=frame_skip) self.reward_type = reward_type self.indicator_threshold = indicator_threshold self.fix_goal = fix_goal self.fixed_goal = np.array(fixed_goal) self.goal_space = Box( np.array(goal_low), np.array(goal_high), ) self._state_goal = None self.action_space = Box(np.array([-1, -1, -1, -1]), np.array([1, 1, 1, 1])) max_angle = 1.5708 self.state_space = Box( np.array([-1, -1, -1, -max_angle]), np.array([1, 1, 1, max_angle]), ) self.observation_space = Dict([ ('observation', self.state_space), ('desired_goal', self.state_space), ('achieved_goal', self.state_space), ('state_observation', self.state_space), ('state_desired_goal', self.state_space), ('state_achieved_goal', self.state_space), ]) self._pos_action_scale = pos_action_scale self.target_pos_scale = target_pos_scale self.action_reward_scale = action_reward_scale self.target_angle_scale = target_angle_scale self.max_x_pos = max_x_pos self.max_y_pos = max_y_pos self.min_y_pos = .5 self.reset() self.reset_mocap_welds()
def __init__(self, model_name, frame_skip=50): MujocoEnv.__init__(self, model_name, frame_skip=frame_skip) # Resets the mocap welds that we use for actuation. sim = self.sim if sim.model.nmocap > 0 and sim.model.eq_data is not None: for i in range(sim.model.eq_data.shape[0]): if sim.model.eq_type[i] == mujoco_py.const.EQ_WELD: # Define the xyz + quat of the mocap relative to the hand sim.model.eq_data[i, :] = np.array( [0., 0., 0., 1., 0., 0., 0.])
def __init__( self, frame_skip=50, goal_low=-.5, goal_high=.5, pos_action_scale=1 / 100, action_reward_scale=0, reward_type='angle_difference', indicator_threshold=0.02, fix_goal=False, fixed_goal=.25, ): self.quick_init(locals()) MultitaskEnv.__init__(self) MujocoEnv.__init__(self, self.model_name, frame_skip=frame_skip) self.reward_type = reward_type self.indicator_threshold = indicator_threshold self.fix_goal = fix_goal self.fixed_goal = np.array([fixed_goal]) self.goal_space = Box(np.array([goal_low]), np.array([goal_high])) self._state_goal = None self.action_space = Box(np.array([-1, -1, -1, -1]), np.array([1, 1, 1, 1])) max_angle = 1.5708 self.state_space = Box( np.array([-1, -1, -1, -max_angle]), np.array([1, 1, 1, max_angle]), ) self.angle_space = Box(np.array([-max_angle]), np.array([max_angle])) self.observation_space = Dict([ ('observation', self.state_space), ('desired_goal', self.goal_space), ('achieved_goal', self.angle_space), ('state_observation', self.state_space), ('state_desired_goal', self.goal_space), ('state_achieved_goal', self.angle_space), ]) self._pos_action_scale = pos_action_scale self.action_reward_scale = action_reward_scale self.reset() self.reset_mocap_welds()
def __init__(self, action_scale=1, frame_skip=5): # self.init_serialization(locals()) self.quick_init(locals()) MultitaskEnv.__init__(self) self.action_scale = action_scale MujocoEnv.__init__(self, self.model_name, frame_skip=frame_skip) bounds = self.model.actuator_ctrlrange.copy() low = bounds[:, 0] high = bounds[:, 1] self.action_space = Box(low=low, high=high) obs_size = self._get_env_obs().shape[0] high = np.inf * np.ones(obs_size) low = -high self.obs_space = Box(low=low, high=high) self.observation_space = Dict([ ('observation', self.obs_space), ('state_observation', self.obs_space), ]) self.reset()
def __init__(self, tasks = [{'goalPos' : [0.2, 0] }] , mode_1d = False, \ init_pos = [0,0] , goal_pos = [0.2, 0], mpl = 200, change_task_every_episode = False, *args, **kwargs): self.quick_init(locals()) model_name = get_asset_full_path('pointMass/point.xml') self.obj_init_pos = init_pos self.tasks = np.array(tasks) self.num_tasks = len(self.tasks) self.task = self.tasks[0] self.curr_path_length = 0 self.max_path_length = mpl self.mode_1d = mode_1d self.change_task_every_episode = change_task_every_episode MujocoEnv.__init__(self, model_name, frame_skip=1, automatically_set_spaces=True) #Serializable.__init__(self, *args, **kwargs) self.info_logKeys = ['targetDist']
def __init__( self, reward_info=None, frame_skip=50, pos_action_scale=2. / 100, randomize_goals=True, hide_goal=False, init_block_low=(-0.05, 0.55), init_block_high=(0.05, 0.65), puck_goal_low=(-0.05, 0.55), puck_goal_high=(0.05, 0.65), hand_goal_low=(-0.05, 0.55), hand_goal_high=(0.05, 0.65), fixed_puck_goal=(0.05, 0.6), fixed_hand_goal=(-0.05, 0.6), mocap_low=(-0.1, 0.5, 0.0), mocap_high=(0.1, 0.7, 0.5), force_puck_in_goal_space=False, ): self.quick_init(locals()) self.reward_info = reward_info self.randomize_goals = randomize_goals self._pos_action_scale = pos_action_scale self.hide_goal = hide_goal self.init_block_low = np.array(init_block_low) self.init_block_high = np.array(init_block_high) self.puck_goal_low = np.array(puck_goal_low) self.puck_goal_high = np.array(puck_goal_high) self.hand_goal_low = np.array(hand_goal_low) self.hand_goal_high = np.array(hand_goal_high) self.fixed_puck_goal = np.array(fixed_puck_goal) self.fixed_hand_goal = np.array(fixed_hand_goal) self.mocap_low = np.array(mocap_low) self.mocap_high = np.array(mocap_high) self.force_puck_in_goal_space = force_puck_in_goal_space self._goal_xyxy = self.sample_goal_xyxy() # MultitaskEnv.__init__(self, distance_metric_order=2) MujocoEnv.__init__(self, self.model_name, frame_skip=frame_skip) self.action_space = Box( np.array([-1, -1]), np.array([1, 1]), ) self.obs_box = Box( np.array([-0.15, 0.5, -0.15, 0.5]), np.array([0.15, 0.7, 0.15, 0.7]), ) goal_low = np.concatenate((self.hand_goal_low, self.puck_goal_low, self.puck_goal_low)) goal_high = np.concatenate((self.hand_goal_high, self.puck_goal_high, self.puck_goal_high)) self.goal_box = Box( goal_low, goal_high, ) self.observation_space = Dict([ ('observation', self.obs_box), ('state_observation', self.obs_box), ('desired_goal', self.goal_box), ('state_desired_goal', self.goal_box), ('achieved_goal', self.goal_box), ('state_achieved_goal', self.goal_box), ]) # hack for state-based experiments for other envs # self.observation_space = Box( # np.array([-0.2, 0.5, -0.2, 0.5, -0.2, 0.5]), # np.array([0.2, 0.7, 0.2, 0.7, 0.2, 0.7]), # ) # self.goal_space = Box( # np.array([-0.2, 0.5, -0.2, 0.5, -0.2, 0.5]), # np.array([0.2, 0.7, 0.2, 0.7, 0.2, 0.7]), # ) self.reset() self.reset_mocap_welds()
def __init__(self, model_name, frame_skip=50): MujocoEnv.__init__(self, model_name, frame_skip=frame_skip) self.reset_mocap_welds()
def __init__( self, reward_type='dense', count_bonus_coeff=None, n_bins=16, target_radius=0.5, # Required distance to goal when using sparse reward norm_order=2, frame_skip=5, two_frames=False, vel_in_state=True, ant_low=list([-6, -6]), ant_high=list([6, 6]), goal_low=list([-6, -6]), goal_high=list([6, 6]), model_path='classic_mujoco/normal_gear_ratio_ant.xml', use_low_gear_ratio=False, goal_is_xy=False, goal_is_qpos=False, init_qpos=None, fixed_goal=None, diagnostics_goal=None, init_xy_mode='corner', terminate_when_unhealthy=False, healthy_z_range=(0.2, 0.9), # health_reward=10, done_penalty=0, goal_sampling_strategy='uniform', presampled_goal_paths='', *args, **kwargs): assert init_xy_mode in { 'corner', 'sample-uniformly-xy-space', 'sample-from-goal-space', # soon to be deprecated } assert not goal_is_xy or not goal_is_qpos self.quick_init(locals()) MultitaskEnv.__init__(self) if use_low_gear_ratio: model_path = "classic_mujoco/ant_maze_gear30_small_dt3_with_invis.xml" MujocoEnv.__init__(self, model_path=get_asset_full_path(model_path), frame_skip=frame_skip, **kwargs) if goal_is_xy: assert reward_type.startswith('xy') if init_qpos is not None: self.init_qpos[:len(init_qpos)] = np.array(init_qpos) self.action_space = Box(-np.ones(8), np.ones(8), dtype=np.float32) self.reward_type = reward_type self.count_bonus_coeff = count_bonus_coeff self.n_bins = n_bins self.bin_counts = np.ones((self.n_bins, self.n_bins)) self.target_radius = target_radius self.norm_order = norm_order self.goal_is_xy = goal_is_xy self.goal_is_qpos = goal_is_qpos self.fixed_goal = fixed_goal self.diagnostics_goal = diagnostics_goal print(f"[Ant] Diagnostics goal: {self.diagnostics_goal}") self.init_xy_mode = init_xy_mode self.terminate_when_unhealthy = terminate_when_unhealthy # self._healthy_reward = health_reward self._done_penalty = done_penalty self._healthy_z_range = healthy_z_range self.model_path = model_path assert goal_sampling_strategy in {'uniform', 'preset1', 'presampled'} self.goal_sampling_strategy = goal_sampling_strategy if self.goal_sampling_strategy == 'presampled': assert presampled_goal_paths is not None if not osp.exists(presampled_goal_paths): presampled_goal_paths = get_asset_full_path( presampled_goal_paths) self.presampled_goals = np.load(presampled_goal_paths) else: self.presampled_goals = None self.ant_low, self.ant_high = np.array(ant_low), np.array(ant_high) goal_low, goal_high = np.array(goal_low), np.array(goal_high) self.two_frames = two_frames self.vel_in_state = vel_in_state if self.vel_in_state: obs_space_low = np.concatenate((self.ant_low, -np.ones(27))) obs_space_high = np.concatenate((self.ant_high, np.ones(27))) if goal_is_xy: goal_space_low = goal_low goal_space_high = goal_high else: goal_space_low = np.concatenate((goal_low, -np.ones(27))) goal_space_high = np.concatenate((goal_high, np.ones(27))) else: obs_space_low = np.concatenate((self.ant_low, -np.ones(13))) obs_space_high = np.concatenate((self.ant_high, np.ones(13))) if goal_is_xy: goal_space_low = goal_low goal_space_high = goal_high else: goal_space_low = np.concatenate((goal_low, -np.ones(13))) goal_space_high = np.concatenate((goal_high, np.ones(13))) if self.two_frames: self.obs_space = Box(np.concatenate( (obs_space_low, obs_space_low)), np.concatenate( (obs_space_high, obs_space_high)), dtype=np.float32) self.goal_space = Box(np.concatenate( (goal_space_low, goal_space_low)), np.concatenate( (goal_space_high, goal_space_high)), dtype=np.float32) else: self.obs_space = Box(obs_space_low, obs_space_high, dtype=np.float32) self.goal_space = Box(goal_space_low, goal_space_high, dtype=np.float32) qpos_space = Box(-10 * np.ones(15), 10 * np.ones(15)) spaces = [ ('observation', self.obs_space), ('desired_goal', self.goal_space), ('achieved_goal', self.goal_space), ('state_observation', self.obs_space), ('state_desired_goal', self.goal_space), ('state_achieved_goal', self.goal_space), ] if self.goal_is_xy: self.xy_obs_space = Box(goal_low, goal_high, dtype=np.float32) spaces += [ ('xy_observation', self.xy_obs_space), ('xy_desired_goal', self.xy_obs_space), ('xy_achieved_goal', self.xy_obs_space), ] if self.goal_is_qpos: spaces += [ ('qpos_desired_goal', qpos_space), ('qpos_achieved_goal', qpos_space), ] self.observation_space = Dict(spaces) self._full_state_goal = None self._xy_goal = None self._qpos_goal = None self._prev_obs = None self._cur_obs = None
def __init__( self, frame_skip=50, action_scale=2. / 100, hand_low=(-0.2, 0.5), hand_high=(0.2, 0.7), puck_low=(-0.2, 0.5), puck_high=(0.2, 0.7), fix_goal=False, sample_realistic_goals=False, fixed_goal=(-0.05, 0.6, 0.05, 0.6), goal_low=None, goal_high=None, fix_reset=False, fixed_reset=(0, 0.55, 0.0, 0.65), reset_low=None, reset_high=None, hand_z_position=0.06, puck_z_position=0.02, reward_type='state_distance', norm_order=2, indicator_threshold=0.06, num_mocap_calls_for_reset=250, square_puck=False, heavy_puck=False, invisible_boundary_wall=False, indicator_threshold_2=0.08, indicator_threshold_3=0.12, test_mode_case_num=None, ): self.quick_init(locals()) self.square_puck = square_puck self.heavy_puck = heavy_puck self.invisible_boundary_wall = invisible_boundary_wall if self.invisible_boundary_wall: model_name = get_asset_full_path( 'sawyer_xyz/sawyer_push_and_reach_nips_wall.xml') elif self.square_puck and self.heavy_puck: model_name = get_asset_full_path( 'sawyer_xyz/sawyer_push_and_reach_nips_square_heavy.xml') elif self.square_puck and not self.heavy_puck: model_name = get_asset_full_path( 'sawyer_xyz/sawyer_push_and_reach_nips_square.xml') elif not self.square_puck and self.heavy_puck: model_name = get_asset_full_path( 'sawyer_xyz/sawyer_push_and_reach_nips_heavy.xml') else: model_name = get_asset_full_path( 'sawyer_xyz/sawyer_push_and_reach_nips.xml') MujocoEnv.__init__(self, model_name, frame_skip=frame_skip) hand_low = np.array(hand_low) hand_high = np.array(hand_high) mocap_low = hand_low mocap_high = hand_high self.mocap_low = np.hstack((mocap_low, np.array([0.0]))) self.mocap_high = np.hstack((mocap_high, np.array([0.5]))) puck_low = np.array(puck_low) puck_high = np.array(puck_high) if self.square_puck: self.puck_radius = np.sqrt(2) * 0.04 else: self.puck_radius = 0.04 self.ee_radius = 0.015 # puck_low += (self.puck_radius + self.ee_radius) # puck_high -= (self.puck_radius + self.ee_radius) # print(hand_low) # print(hand_high) # print(puck_low) # print(puck_high) self.obs_space = Box(np.hstack((hand_low, puck_low)), np.hstack((hand_high, puck_high)), dtype=np.float32) self.hand_space = Box(hand_low, hand_high, dtype=np.float32) self.puck_space = Box(puck_low, puck_high, dtype=np.float32) if goal_low is None: goal_low = self.obs_space.low.copy() if goal_high is None: goal_high = self.obs_space.high.copy() if reset_low is None: reset_low = self.obs_space.low.copy() if reset_high is None: reset_high = self.obs_space.high.copy() goal_low = np.array(goal_low) goal_high = np.array(goal_high) reset_low = np.array(reset_low) reset_high = np.array(reset_high) self.goal_space = Box(goal_low, goal_high, dtype=np.float32) self.reset_space = Box(reset_low, reset_high, dtype=np.float32) self.observation_space = Dict([ ('observation', self.obs_space), ('desired_goal', self.goal_space), ('achieved_goal', self.obs_space), ('state_observation', self.obs_space), ('state_desired_goal', self.goal_space), ('state_achieved_goal', self.obs_space), ('proprio_observation', self.hand_space), ('proprio_desired_goal', Box(goal_low[:2], goal_high[:2], dtype=np.float32)), ('proprio_achieved_goal', self.hand_space), ]) self.num_mocap_calls_for_reset = num_mocap_calls_for_reset self.fix_reset = fix_reset self.sample_realistic_goals = sample_realistic_goals self.fixed_reset = np.array(fixed_reset) self.fix_goal = fix_goal self.fixed_goal = np.array(fixed_goal) self._state_goal = None self.test_mode_case_num = test_mode_case_num assert self.test_mode_case_num in [None, 1, 2, 3, 4, 5] if self.test_mode_case_num is not None: self.fix_reset = True self.fix_goal = True if self.test_mode_case_num == 1: self.fixed_reset = np.array([0.20, 0.70, -0.15, 0.55]) self.fixed_goal = np.array([-0.20, 0.50, 0.20, 0.70]) elif self.test_mode_case_num == 2: self.fixed_reset = np.array([0.20, 0.70, -0.15, 0.55]) self.fixed_goal = np.array([0.15, 0.65, 0.20, 0.70]) elif self.test_mode_case_num == 3: self.fixed_reset = np.array([0.20, 0.70, -0.15, 0.55]) self.fixed_goal = np.array([0.20, 0.70, -0.20, 0.70]) elif self.test_mode_case_num == 4: self.fixed_reset = np.array([0.20, 0.70, -0.15, 0.55]) self.fixed_goal = np.array([0.20, 0.50, -0.20, 0.70]) elif self.test_mode_case_num == 5: self.fixed_reset = np.array([0.20, 0.70, -0.15, 0.55]) self.fixed_goal = np.array([0.20, 0.50, 0.20, 0.70]) self.reward_type = reward_type self.norm_order = norm_order self.indicator_threshold = indicator_threshold self.indicator_threshold_2 = indicator_threshold_2 self.indicator_threshold_3 = indicator_threshold_3 self.action_space = Box(np.array([-1, -1]), np.array([1, 1]), dtype=np.float32) self._action_scale = action_scale self.hand_z_position = hand_z_position self.puck_z_position = puck_z_position self.reset_counter = 0 self.reset()
def __init__(self, # Room room, # Start and Goal start_config="all", goal_config="all", # Reward potential_type='', shaped=False, base_reward='', # State and Goal Representations use_state_images=False, use_goal_images=False, image_resolution=64, # Time Limits max_path_length=None, *args, **kwargs ): # Initialize Serializable.quick_init(self, locals()) MultitaskEnv.__init__(self) # Environment Configuration self._room = room model = self.room.get_mjcmodel() self.possible_positions = self.room.XY(n=50) with model.asfile() as f: MujocoEnv.__init__(self, f.name, frame_skip=self.FRAME_SKIP) # Initialization self.start_config = start_config self.baseline_start = self.room.get_start() self.start = np.zeros_like(self.baseline_start) self.goal_config = goal_config self.baseline_goal = self.room.get_target() self.goal = np.zeros_like(self.baseline_goal) # Time Limit self.curr_path_length = 0 if max_path_length is None: self.max_path_length = self.MAX_PATH_LENGTH else: self.max_path_length = max_path_length # Reward Functions self.potential_type = potential_type self.shaped = shaped self.base_reward = base_reward # Action Space bounds = self.model.actuator_ctrlrange.copy() self.action_space = Box(low=bounds[:, 0], high=bounds[:, 1]) self.use_state_images = use_state_images self.use_goal_images = use_goal_images self.image_resolution = image_resolution # Observation Space example_state_obs = self._get_env_obs() if self.use_state_images: example_obs = self.get_image(self.image_resolution, self.image_resolution, camera_name='topview') else: example_obs = example_state_obs state_obs_shape = example_obs.shape obs_shape = example_obs.shape self.obs_space = Box(-1 * np.ones(obs_shape),np.ones(obs_shape)) self.state_obs_space = Box(-1 * np.ones(state_obs_shape), np.ones(state_obs_shape)) # Goal Space example_state_goal = self._get_env_achieved_goal(example_state_obs) if self.use_goal_images: example_goal = self.get_image(self.image_resolution, self.image_resolution, camera_name='topview') else: example_goal = example_state_goal state_goal_shape = example_state_goal.shape goal_shape = example_goal.shape self.goal_space = Box(-1 * np.ones(goal_shape), np.ones(goal_shape)) self.state_goal_space = Box(-1 * np.ones(state_goal_shape), np.ones(state_goal_shape)) # Final Setup self.observation_space = Dict([ ('observation', self.obs_space), ('desired_goal', self.goal_space), ('achieved_goal', self.goal_space), ('state_observation', self.state_obs_space), ('state_desired_goal', self.state_goal_space), ('state_achieved_goal', self.state_goal_space), ]) self.reset()
def __init__( self, reward_info=None, frame_skip=50, pos_action_scale=4. / 100, randomize_goals=True, puck_goal_low=(-0.1, 0.5), puck_goal_high=(0.1, 0.7), hand_goal_low=(-0.1, 0.5), hand_goal_high=(0.1, 0.7), mocap_low=(-0.1, 0.5, 0.0), mocap_high=(0.1, 0.7, 0.5), # unused init_block_low=(-0.05, 0.55), init_block_high=(0.05, 0.65), fixed_puck_goal=(0.05, 0.6), fixed_hand_goal=(-0.05, 0.6), # multi-object num_objects=3, fixed_colors=True, seed=1, filename='sawyer_multiobj.xml', object_mass=1, # object_meshes=['Bowl', 'GlassBowl', 'LotusBowl01', 'ElephantBowl', 'RuggedBowl'], object_meshes=None, obj_classname=None, block_height=0.02, block_width=0.02, cylinder_radius=0.035, finger_sensors=False, maxlen=0.07, minlen=0.01, preload_obj_dict=None, reset_to_initial_position=True, object_low=(-np.inf, -np.inf, -np.inf), object_high=(np.inf, np.inf, np.inf), action_repeat=1, fixed_start=False, fixed_start_pos=(0, 0.6), goal_moves_one_object=True, num_scene_objects=[ 0, 1, 2 ], # list of number of objects that can appear per scene object_height=0.02, use_textures=False, init_camera=None, sliding_joints=False, ): if seed: np.random.seed(seed) self.env_seed = seed self.quick_init(locals()) self.reward_info = reward_info self.randomize_goals = randomize_goals self._pos_action_scale = pos_action_scale self.reset_to_initial_position = reset_to_initial_position self.init_block_low = np.array(init_block_low) self.init_block_high = np.array(init_block_high) self.puck_goal_low = np.array(puck_goal_low) self.puck_goal_high = np.array(puck_goal_high) self.hand_goal_low = np.array(hand_goal_low) self.hand_goal_high = np.array(hand_goal_high) self.fixed_puck_goal = np.array(fixed_puck_goal) self.fixed_hand_goal = np.array(fixed_hand_goal) self.mocap_low = np.array(mocap_low) self.mocap_high = np.array(mocap_high) self.object_low = np.array(object_low) self.object_high = np.array(object_high) self.action_repeat = action_repeat self.fixed_colors = fixed_colors self.goal_moves_one_object = goal_moves_one_object self.num_objects = num_objects self.num_scene_objects = num_scene_objects self.object_height = object_height self.fixed_start = fixed_start self.fixed_start_pos = np.array(fixed_start_pos) self.maxlen = maxlen self.use_textures = use_textures self.sliding_joints = sliding_joints self.cur_objects = np.array([0, 1, 2]) self.preload_obj_dict = preload_obj_dict self.num_cur_objects = 0 # Generate XML base_filename = asset_base_path + filename friction_params = (0.1, 0.1, 0.02) self.obj_stat_prop = create_object_xml( base_filename, num_objects, object_mass, friction_params, object_meshes, finger_sensors, maxlen, minlen, preload_obj_dict, obj_classname, block_height, block_width, cylinder_radius, use_textures, sliding_joints) gen_xml = create_root_xml(base_filename) MujocoEnv.__init__(self, gen_xml, frame_skip=frame_skip) clean_xml(gen_xml) if self.use_textures: self.modder = TextureModder(self.sim) self.state_goal = self.sample_goal_for_rollout() # MultitaskEnv.__init__(self, distance_metric_order=2) # MujocoEnv.__init__(self, gen_xml, frame_skip=frame_skip) self.action_space = Box( np.array([-1, -1]), np.array([1, 1]), ) self.num_objects = num_objects low = (self.num_scene_objects[0] + 1) * [-0.2, 0.5] high = (self.num_scene_objects[0] + 1) * [0.2, 0.7] self.obs_box = Box( np.array(low), np.array(high), ) goal_low = np.array( low) # np.concatenate((self.hand_goal_low, self.puck_goal_low)) goal_high = np.array( high) # np.concatenate((self.hand_goal_high, self.puck_goal_high)) self.goal_box = Box( goal_low, goal_high, ) self.total_objects = self.num_objects + 1 self.objects_box = Box( np.zeros((self.total_objects, )), np.ones((self.total_objects, )), ) self.observation_space = Dict([ ('observation', self.obs_box), ('state_observation', self.obs_box), ('desired_goal', self.goal_box), ('state_desired_goal', self.goal_box), ('achieved_goal', self.goal_box), ('state_achieved_goal', self.goal_box), ('objects', self.objects_box), ]) # hack for state-based experiments for other envs # self.observation_space = Box( # np.array([-0.2, 0.5, -0.2, 0.5, -0.2, 0.5]), # np.array([0.2, 0.7, 0.2, 0.7, 0.2, 0.7]), # ) # self.goal_space = Box( # np.array([-0.2, 0.5, -0.2, 0.5, -0.2, 0.5]), # np.array([0.2, 0.7, 0.2, 0.7, 0.2, 0.7]), # ) self.set_initial_object_positions() if use_textures: super().initialize_camera(init_camera) self.initialized_camera = init_camera self.reset() self.reset_mocap_welds()
def __init__( self, obj_low=None, obj_high=None, hand_low=(-0.1, 0.55, 0.05), hand_high=(0.05, 0.65, 0.2), reward_type='hand_and_obj_distance', indicator_threshold=0.06, frame_skip = 50, obj_init_positions=((0, 0.6, 0.02),), random_init=False, num_objects=1, fix_goal=False, mocap_low=(-0.13, 0.57, 0.04), mocap_high=(0.08, 0.73, 0.2), action_scale=0.02, fixed_goal=(0.15, 0.6, 0.055, -0.15, 0.6), goal_low=None, goal_high=None, reset_free=False, filename='sawyer_pick_and_place_multiobj.xml', object_mass=1, # object_meshes=['Bowl', 'GlassBowl', 'LotusBowl01', 'ElephantBowl', 'RuggedBowl'], object_meshes=None, obj_classname = None, block_height=0.04, block_width = 0.04, cylinder_radius = 0.015, finger_sensors=False, maxlen=0.06, minlen=0.01, preload_obj_dict=None, hide_goal_markers=False, oracle_reset_prob=0.0, presampled_goals=None, num_goals_presampled=2, p_obj_in_hand=1, **kwargs ): self.quick_init(locals()) MultitaskEnv.__init__(self) base_filename = asset_base_path + filename friction_params = (1, 1, 2) self.obj_stat_prop = create_object_xml(base_filename, num_objects, object_mass, friction_params, object_meshes, finger_sensors, maxlen, minlen, preload_obj_dict, obj_classname, block_height, block_width, cylinder_radius) gen_xml = create_root_xml(base_filename) MujocoEnv.__init__(self, gen_xml, frame_skip=frame_skip) self.reset_mocap_welds() clean_xml(gen_xml) self.mocap_low = mocap_low self.mocap_high = mocap_high self.action_scale = action_scale self.hand_low = np.array(hand_low) self.hand_high = np.array(hand_high) if obj_low is None: obj_low = self.hand_low if obj_high is None: obj_high = self.hand_high self.obj_low = obj_low self.obj_high = obj_high if goal_low is None: goal_low = np.hstack((self.hand_low, obj_low)) if goal_high is None: goal_high = np.hstack((self.hand_high, obj_high)) self.reward_type = reward_type self.random_init = random_init self.p_obj_in_hand = p_obj_in_hand self.indicator_threshold = indicator_threshold self.obj_init_z = obj_init_positions[0][2] self.obj_init_positions = np.array(obj_init_positions) self.last_obj_pos = self.obj_init_positions[0] self.fix_goal = fix_goal self.fixed_goal = np.array(fixed_goal) self._state_goal = None self.reset_free = reset_free self.oracle_reset_prob = oracle_reset_prob self.obj_ind_to_manip = 0 self.hide_goal_markers = hide_goal_markers self.action_space = Box( np.array([-1, -1, -1, -1]), np.array([1, 1, 1, 1]), dtype=np.float32 ) self.hand_and_obj_space = Box( np.hstack((self.hand_low, obj_low)), np.hstack((self.hand_high, obj_high)), dtype=np.float32 ) self.hand_space = Box( self.hand_low, self.hand_high, dtype=np.float32 ) self.gripper_and_hand_and_obj_space = Box( np.hstack(([0.0], self.hand_low, obj_low)), np.hstack(([0.04], self.hand_high, obj_high)), dtype=np.float32 ) self.num_objects = num_objects self.maxlen = maxlen self.observation_space = Dict([ ('observation', self.gripper_and_hand_and_obj_space), ('desired_goal', self.hand_and_obj_space), ('achieved_goal', self.hand_and_obj_space), ('state_observation', self.gripper_and_hand_and_obj_space), ('state_desired_goal', self.hand_and_obj_space), ('state_achieved_goal', self.hand_and_obj_space), ('proprio_observation', self.hand_space), ('proprio_desired_goal', self.hand_space), ('proprio_achieved_goal', self.hand_space), ]) self.hand_reset_pos = np.array([0, .6, .2]) if presampled_goals is not None: self._presampled_goals = presampled_goals self.num_goals_presampled = len(list(self._presampled_goals.values)[0]) else: # presampled_goals will be created when sample_goal is first called self._presampled_goals = None self.num_goals_presampled = num_goals_presampled self.num_goals_presampled = 10 self.picked_up_object = False self.train_pickups = 0 self.eval_pickups = 0 self.cur_mode = 'train' self.reset()
def __init__( self, puck_count: int = 2, reward_info=None, frame_skip: int = 50, pos_action_scale: float = 2. / 100, randomize_goals: bool = True, hide_goal: bool = False, init_block_low: Tuple[float, float] = (-0.05, 0.55), init_block_high: Tuple[float, float] = (0.05, 0.65), puck_goal_low: Tuple[float, float] = (-0.05, 0.55), puck_goal_high: Tuple[float, float] = (0.05, 0.65), hand_goal_low: Tuple[float, float] = (-0.05, 0.55), hand_goal_high: Tuple[float, float] = (0.05, 0.65), fixed_puck_goal: Tuple[float, float] = (0.05, 0.6), fixed_hand_goal: Tuple[float, float] = (-0.05, 0.6), mocap_low: Tuple[float, float, float] = (-0.1, 0.5, 0.0), mocap_high: Tuple[float, float, float] = (0.1, 0.7, 0.5), area_bound_low: Tuple[float, float] = (-0.2, 0.5), area_bound_high: Tuple[float, float] = (0.2, 0.7), force_puck_in_goal_space=False, ): self.quick_init(locals()) self.puck_count = puck_count self.reward_info = reward_info self.randomize_goals = randomize_goals self._pos_action_scale = pos_action_scale self.hide_goal = hide_goal self.init_block_low = np.array(init_block_low) self.init_block_high = np.array(init_block_high) self.hand_goal_low = np.array(hand_goal_low) self.hand_goal_high = np.array(hand_goal_high) self.puck_goal_low = np.array(puck_goal_low * puck_count) self.puck_goal_high = np.array(puck_goal_high * puck_count) self.fixed_puck_goal = np.array(fixed_puck_goal * puck_count) self.fixed_hand_goal = np.array(fixed_hand_goal * puck_count) self.mocap_low = np.array(mocap_low) self.mocap_high = np.array(mocap_high) self.force_puck_in_goal_space = force_puck_in_goal_space self._goal_xyxy = self.sample_goal_xyxy() MujocoEnv.__init__(self, self.model_name, frame_skip=frame_skip) self.action_space = spaces.Box(np.array([-1, -1]), np.array([1, 1])) self.obs_box = spaces.Box( np.array(area_bound_low * (puck_count + 1)), np.array(area_bound_high * (puck_count + 1)), ) goal_low = np.concatenate((self.hand_goal_low, self.puck_goal_low)) goal_high = np.concatenate((self.hand_goal_high, self.puck_goal_high)) self.goal_box = spaces.Box(goal_low, goal_high) self.observation_space = spaces.Dict([ ('observation', self.obs_box), ('state_observation', self.obs_box), ('desired_goal', self.goal_box), ('state_desired_goal', self.goal_box), ('achieved_goal', self.goal_box), ('state_achieved_goal', self.goal_box), ]) self.endeff_id = self.model.body_name2id('leftclaw') self.hand_goal_id = self.model.body_name2id('hand-goal') self.puck_ids = [ self.model.body_name2id('puck-{}'.format(i)) for i in range(puck_count) ] self.puck_goal_ids = [ self.model.body_name2id('puck-goal-{}'.format(i)) for i in range(puck_count) ] self.hand_goal_qpos_index = self.model.get_joint_qpos_addr( 'hand-goal-joint')[0] self.hand_goal_qvel_index = self.model.get_joint_qvel_addr( 'hand-goal-joint')[0] self.puck_qpos_indices = [ self.model.get_joint_qpos_addr('puckjoint-{}'.format(i))[0] for i in range(puck_count) ] self.puck_qvel_indices = [ self.model.get_joint_qvel_addr('puckjoint-{}'.format(i))[0] for i in range(puck_count) ] self.puck_goal_qpos_indices = [ self.model.get_joint_qpos_addr('puck-goal-joint-{}'.format(i))[0] for i in range(puck_count) ] self.puck_goal_qvel_indices = [ self.model.get_joint_qvel_addr('puck-goal-joint-{}'.format(i))[0] for i in range(puck_count) ] self.reset() self.reset_mocap_welds()
def __init__( self, reward_info=None, frame_skip=2, pos_action_scale=2. / 100, randomize_goals=False, puck_goal_low=(-0.1, 0.5), puck_goal_high=(0.1, 0.7), hand_goal_low=(-0.1, 0.5), hand_goal_high=(0.1, 0.7), mocap_low=(-0.1, 0.5, 0.02), #(-0.1, 0.5, 0) mocap_high=(0.1, 0.7, 0.5), # unused init_block_low=(-0.05, 0.55), init_block_high=(0.05, 0.65), fixed_puck_goal=(0., 0.7), fixed_hand_goal=(-0.05, 0.6), # multi-object num_objects=1, target_object_idx=0, filename='sawyer_multiobj.xml', object_mass=1, # object_meshes=['Bowl', 'GlassBowl', 'LotusBowl01', 'ElephantBowl', 'RuggedBowl'], object_meshes=None, obj_classname=None, block_height=0.02, block_width=0.02, cylinder_radius=0.05, finger_sensors=False, maxlen=0.06, minlen=0.01, preload_obj_dict=None, reset_to_initial_position=True, object_low=(-0.1, 0.5, 0.0), object_high=(0.1, 0.7, 0.5), action_repeat=1, fixed_start=True, objjoint_xy=True, ): self.quick_init(locals()) self.reward_info = reward_info self.randomize_goals = randomize_goals self._pos_action_scale = pos_action_scale self.reset_to_initial_position = reset_to_initial_position self.objjoint_xy = objjoint_xy self.init_block_low = np.array(init_block_low) self.init_block_high = np.array(init_block_high) self.puck_goal_low = np.array(puck_goal_low) self.puck_goal_high = np.array(puck_goal_high) self.hand_goal_low = np.array(hand_goal_low) self.hand_goal_high = np.array(hand_goal_high) self.fixed_puck_goal = np.array(fixed_puck_goal) self.fixed_hand_goal = np.array(fixed_hand_goal) self.mocap_low = np.array(mocap_low) self.mocap_high = np.array(mocap_high) self.object_low = np.array(object_low) self.object_high = np.array(object_high) self.action_repeat = action_repeat self.num_objects = num_objects self.target_object_idx = target_object_idx self.fixed_start = fixed_start self.maxlen = maxlen # Generate XML base_filename = asset_base_path + filename friction_params = (0.1, 0.1, 0.02) self.obj_stat_prop = create_object_xml(base_filename, num_objects, object_mass, friction_params, object_meshes, finger_sensors, maxlen, minlen, preload_obj_dict, obj_classname, block_height, block_width, cylinder_radius, objjoint_xy=objjoint_xy) gen_xml = create_root_xml(base_filename) MujocoEnv.__init__(self, gen_xml, frame_skip=frame_skip) clean_xml(gen_xml) self.state_goal = self.sample_goal_xyxy() # MultitaskEnv.__init__(self, distance_metric_order=2) # MujocoEnv.__init__(self, gen_xml, frame_skip=frame_skip) # self.action_space = Box( # np.array([-1, -1]), # np.array([1, 1]), # ) # Add quaternion self.action_space = Box( np.array([-1, -1, 0, -1, -1, -1]), np.array([1, 1, 2 * np.pi, 1, 1, 1]), ) self.num_objects = num_objects low = (self.num_objects + 1) * [-0.2, 0.5] high = (self.num_objects + 1) * [0.2, 0.7] self.observation_space = Box( np.array(low), np.array(high), ) goal_low = np.array( [-0.2, 0.5]) # np.concatenate((self.hand_goal_low, self.puck_goal_low)) goal_high = np.array([ 0.2, 0.7 ]) # np.concatenate((self.hand_goal_high, self.puck_goal_high)) self.goal_box = Box( goal_low, goal_high, ) # hack for state-based experiments for other envs # self.observation_space = Box( # np.array([-0.2, 0.5, -0.2, 0.5, -0.2, 0.5]), # np.array([0.2, 0.7, 0.2, 0.7, 0.2, 0.7]), # ) # self.goal_space = Box( # np.array([-0.2, 0.5, -0.2, 0.5, -0.2, 0.5]), # np.array([0.2, 0.7, 0.2, 0.7, 0.2, 0.7]), # ) self.set_initial_object_positions() self.reset() self.reset_mocap_welds()
def __init__( self, reward_type='state_distance', norm_order=2, action_scale=20, frame_skip=3, two_frames=False, vel_in_state=True, z_in_state=True, car_low=(-1.90, -1.90), #list([-1.60, -1.60]), car_high=(1.90, 1.90), #list([1.60, 1.60]), goal_low=(-1.90, -1.90), #list([-1.60, -1.60]), goal_high=(1.90, 1.90), #list([1.60, 1.60]), model_path='wheeled_car.xml', reset_low=None, reset_high=None, *args, **kwargs): self.quick_init(locals()) MultitaskEnv.__init__(self) MujocoEnv.__init__(self, model_path=get_asset_full_path('locomotion/' + model_path), frame_skip=frame_skip, **kwargs) self.action_space = Box(np.array([-1, -1]), np.array([1, 1]), dtype=np.float32) self.action_scale = action_scale # set radius if model_path in ['wheeled_car.xml', 'wheeled_car_u_wall.xml']: self.car_radius = np.sqrt(2) * 0.34 elif model_path == 'wheeled_car_old.xml': self.car_radius = 0.30 else: raise NotImplementedError # set walls if model_path == 'wheeled_car_u_wall.xml': self.walls = [ Wall(0, -0.5, 0.75, 0.05, self.car_radius), Wall(0.70, 0.05, 0.05, 0.5, self.car_radius), Wall(-0.70, 0.05, 0.05, 0.5, self.car_radius), ] else: self.walls = [] self.reward_type = reward_type self.norm_order = norm_order self.car_low, self.car_high = np.array(car_low), np.array(car_high) self.goal_low, self.goal_high = np.array(goal_low), np.array(goal_high) if reset_low is None: self.reset_low = np.array(car_low) else: self.reset_low = np.array(reset_low) if reset_high is None: self.reset_high = np.array(car_high) else: self.reset_high = np.array(reset_high) self.car_low += self.car_radius self.car_high -= self.car_radius self.goal_low += self.car_radius self.goal_high -= self.car_radius self.reset_low += self.car_radius self.reset_high -= self.car_radius self.two_frames = two_frames self.vel_in_state = vel_in_state self.z_in_state = z_in_state # x and y pos obs_space_low = np.copy(self.car_low) obs_space_high = np.copy(self.car_high) goal_space_low = np.copy(self.goal_low) goal_space_high = np.copy(self.goal_high) # z pos if self.z_in_state: obs_space_low = np.concatenate((obs_space_low, [-1])) obs_space_high = np.concatenate((obs_space_high, [0.03])) goal_space_low = np.concatenate((goal_space_low, [0])) goal_space_high = np.concatenate((goal_space_high, [0])) # sin and cos obs_space_low = np.concatenate((obs_space_low, [-1, -1])) obs_space_high = np.concatenate((obs_space_high, [1, 1])) goal_space_low = np.concatenate((goal_space_low, [-1, -1])) goal_space_high = np.concatenate((goal_space_high, [1, 1])) if self.vel_in_state: # x and y vel obs_space_low = np.concatenate((obs_space_low, [-10, -10])) obs_space_high = np.concatenate((obs_space_high, [10, 10])) goal_space_low = np.concatenate((goal_space_low, [0, 0])) goal_space_high = np.concatenate((goal_space_high, [0, 0])) # z vel if self.z_in_state: obs_space_low = np.concatenate((obs_space_low, [-10])) obs_space_high = np.concatenate((obs_space_high, [10])) goal_space_low = np.concatenate((goal_space_low, [0])) goal_space_high = np.concatenate((goal_space_high, [0])) # theta vel obs_space_low = np.concatenate((obs_space_low, [-10])) obs_space_high = np.concatenate((obs_space_high, [10])) goal_space_low = np.concatenate((goal_space_low, [0])) goal_space_high = np.concatenate((goal_space_high, [0])) self.obs_space = Box(obs_space_low, obs_space_high, dtype=np.float32) self.goal_space = Box(goal_space_low, goal_space_high, dtype=np.float32) if self.two_frames: self.obs_space = concatenate_box_spaces(self.obs_space, self.obs_space) self.goal_space = concatenate_box_spaces(self.goal_space, self.goal_space) self.observation_space = Dict([ ('observation', self.obs_space), ('desired_goal', self.goal_space), ('achieved_goal', self.obs_space), ('state_observation', self.obs_space), ('state_desired_goal', self.goal_space), ('state_achieved_goal', self.obs_space), ('proprio_observation', self.obs_space), ('proprio_desired_goal', self.goal_space), ('proprio_achieved_goal', self.obs_space), ]) self._state_goal = None self._cur_obs = None self._prev_obs = None self.reset()
def __init__(self, reward_type='dense', norm_order=2, action_scale=0.15, frame_skip=100, ball_low=(-4.0, -4.0), ball_high=(4.0, 4.0), goal_low=(-4.0, -4.0), goal_high=(4.0, 4.0), model_path='pointmass_u_wall_big.xml', reset_low=None, reset_high=None, v_func_heatmap_bounds=(-2.0, 0.0), *args, **kwargs): self.quick_init(locals()) MultitaskEnv.__init__(self) MujocoEnv.__init__(self, model_path=get_asset_full_path('pointmass/' + model_path), frame_skip=frame_skip, **kwargs) self.action_space = Box(np.array([-1, -1]), np.array([1, 1]), dtype=np.float32) self.action_scale = action_scale self.ball_radius = 0.50 self.walls = [ Wall(0, 1.0, 2.5, 0.5, self.ball_radius), Wall(2.0, -0.5, 0.5, 2.0, self.ball_radius), Wall(-2.0, -0.5, 0.5, 2.0, self.ball_radius), ] self.reward_type = reward_type self.norm_order = norm_order self.ball_low, self.ball_high = np.array(ball_low), np.array(ball_high) self.goal_low, self.goal_high = np.array(goal_low), np.array(goal_high) if reset_low is None: self.reset_low = np.array(ball_low) else: self.reset_low = np.array(reset_low) if reset_high is None: self.reset_high = np.array(ball_high) else: self.reset_high = np.array(reset_high) obs_space_low = np.copy(self.ball_low) obs_space_high = np.copy(self.ball_high) goal_space_low = np.copy(self.goal_low) goal_space_high = np.copy(self.goal_high) self.obs_space = Box(obs_space_low, obs_space_high, dtype=np.float32) self.goal_space = Box(goal_space_low, goal_space_high, dtype=np.float32) self.observation_space = Dict([ ('observation', self.obs_space), ('desired_goal', self.goal_space), ('achieved_goal', self.obs_space), ('state_observation', self.obs_space), ('state_desired_goal', self.goal_space), ('state_achieved_goal', self.obs_space), ('proprio_observation', self.obs_space), ('proprio_desired_goal', self.goal_space), ('proprio_achieved_goal', self.obs_space), ]) self.v_func_heatmap_bounds = v_func_heatmap_bounds self._state_goal = None self.reset()