def model_name(self): if(self.puckAndRobotInvisible == True): return get_asset_full_path('sawyer_xyz/sawyer_push_and_reach_mocap_goal_hidden_hurdle_puck_and_robot_invisible.xml') if(self.puckInvisible == True): return get_asset_full_path('sawyer_xyz/sawyer_push_and_reach_mocap_goal_hidden_hurdle_puck_invisible.xml') else: return get_asset_full_path('sawyer_xyz/sawyer_push_and_reach_mocap_goal_hidden_hurdle.xml')
def model_name(self): if self.hide_arm: print('hiding') return get_asset_full_path( 'sawyer_xyz/sawyer_pick_and_place_hidden_arm.xml') print('not hiding') return get_asset_full_path('sawyer_xyz/sawyer_pick_and_place.xml')
def model_name(self): if self.obstacle_height == 'multiple': return get_asset_full_path('classic_mujoco/walker_obstacles/walker_multiple_obstacles.xml') return get_asset_full_path( 'classic_mujoco/walker_obstacles/walker_obstacle_position={}_height={}.xml'.format( self.obstacle_position, self.obstacle_height))
def model_name(self): if self.ballInvisible == True: return get_asset_full_path( 'sawyer_xyz/sawyer_pick_and_place_ball_invisible.xml') if self.ballAndRobotInvisible == True: return get_asset_full_path( 'sawyer_xyz/sawyer_pick_and_place_ball_and_robot_invisible.xml' ) else: return get_asset_full_path('sawyer_xyz/sawyer_pick_and_place.xml')
def model_name(self): #Remember to set the right limits in the base file (right line needs to be commented out) if self.hand_type == 'parallel_v1': self.reset_mocap_quat = [1,0,1,0] return get_asset_full_path('sawyer_xyz/sawyer_pick_and_place.xml') ############################# WSG GRIPPER ############################# elif self.hand_type == 'weiss_v1': self.reset_mocap_quat = zangle_to_quat(np.pi/2) #return get_asset_full_path('sawyer_xyz/sawyer_wsg_pickPlace_mug.xml') return get_asset_full_path('sawyer_xyz/sawyer_wsg_pickPlace.xml')
def model_name(self): if self.structure == '3d': return get_asset_full_path('sawyer_xyz/sawyer_pick_and_place_3d.xml') elif self.structure == '2d': return get_asset_full_path('sawyer_xyz/sawyer_pick_and_place_2d.xml') elif self.structure == '2d_wall_short': return get_asset_full_path('sawyer_xyz/sawyer_pick_and_place_2d_wall_short.xml') elif self.structure == '2d_wall_tall': return get_asset_full_path('sawyer_xyz/sawyer_pick_and_place_2d_wall_tall.xml') elif self.structure == '2d_wall_tall_dark': return get_asset_full_path('sawyer_xyz/sawyer_pick_and_place_2d_wall_tall_dark.xml')
def model_name(self): #return get_asset_full_path('sawyer_xyz/sawyer_pickPlace.xml') self.reset_mocap_quat = zangle_to_quat( np.pi / 2) #this is the reset_mocap_quat for wsg grippers #return get_asset_full_path('sawyer_xyz/sawyer_wsg_pickPlace_mug.xml') return get_asset_full_path('sawyer_xyz/sawyer_wsg_pickPlace.xml')
def __init__( self, presampled_positions='classic_mujoco/ant_goal_qpos_5x5_xy.npy', ): self.quick_init(locals()) super().__init__( include_contact_forces_in_state=False, xml_path='classic_mujoco/ant_full_state_goal.xml', ) self.goal_space = Box( self.observation_space.low[:15], self.observation_space.high[:15], ) self.observation_space = Dict([ ('observation', self.observation_space), ('desired_goal', self.goal_space), ('achieved_goal', self.goal_space), ]) self.camera_init = create_camera_init( lookat=(0, 0, 0), distance=15, elevation=-45, # trackbodyid=self.sim.model.body_name2id('torso'), ) self.presampled_qpos = np.load( get_asset_full_path(presampled_positions)) self._goal = None idx = random.randint(0, len(self.presampled_qpos) - 1) self.goal = self.presampled_qpos[idx]
def model_name(self): # return get_asset_full_path( # 'sawyer_xyz/sawyer_push_and_reach_mocap_goal_hidden.xml' # ) return get_asset_full_path( 'sawyer_xyz/sawyer_push_and_reach_mocap_goal_hidden.xml' )
def __init__( self, use_low_gear_ratio=False, include_contact_forces_in_state=True, xml_path=None, max_steps=250, ): self.quick_init(locals()) if xml_path is None: if use_low_gear_ratio: xml_path = 'classic_mujoco/low_gear_ratio_ant.xml' else: xml_path = 'classic_mujoco/ant.xml' super().__init__( get_asset_full_path(xml_path), frame_skip=5, ) self.include_contact_forces_in_state = include_contact_forces_in_state bounds = self.model.actuator_ctrlrange.copy() low = bounds[:, 0] high = bounds[:, 1] self.action_space = Box(low, high) obs_size = self._get_env_obs().shape[0] high = np.inf * np.ones(obs_size) low = -high self.observation_space = Box(low, high) self.max_steps = max_steps self._current_steps = 0
def __init__( self, goal_low=(-.25, .3, .12, -1.5708), goal_high=(.25, .6, .12, 0), action_reward_scale=0, reward_type='angle_difference', indicator_threshold=(.02, .03), fix_goal=False, fixed_goal=(0, .45, .12, -.25), reset_free=False, fixed_hand_z=0.12, hand_low=(-0.25, 0.3, .12), hand_high=(0.25, 0.6, .12), target_pos_scale=1, target_angle_scale=1, min_angle=-1.5708, max_angle=0, xml_path='sawyer_xyz/sawyer_door_pull.xml', **sawyer_xyz_kwargs ): self.quick_init(locals()) self.model_name = get_asset_full_path(xml_path) SawyerXYZEnv.__init__( self, self.model_name, hand_low=hand_low, hand_high=hand_high, **sawyer_xyz_kwargs ) MultitaskEnv.__init__(self) self.reward_type = reward_type self.indicator_threshold = indicator_threshold self.fix_goal = fix_goal self.fixed_goal = np.array(fixed_goal) self.goal_space = Box(np.array(goal_low), np.array(goal_high)) self._state_goal = None self.fixed_hand_z = fixed_hand_z self.action_space = Box(np.array([-1, -1]), np.array([1, 1])) self.state_space = Box( np.concatenate((hand_low, [min_angle])), np.concatenate((hand_high, [max_angle])), ) self.observation_space = Dict([ ('observation', self.state_space), ('desired_goal', self.goal_space), ('achieved_goal', self.state_space), ('state_observation', self.state_space), ('state_desired_goal', self.goal_space), ('state_achieved_goal', self.state_space), ]) self.action_reward_scale = action_reward_scale self.target_pos_scale = target_pos_scale self.target_angle_scale = target_angle_scale self.reset_free = reset_free self.door_angle_idx = self.model.get_joint_qpos_addr('doorjoint') self.reset()
def model_name(self): #return get_asset_full_path('sawyer_xyz/sawyer_pickPlace.xml') #self.reset_mocap_quat = zangle_to_quat(np.pi/2) #this is the reset_mocap_quat for wsg grippers #self.reset_mocap_quat = zangle_to_quat(-np.pi/2) init_quat = [1, 0, 0, 1] self.reset_mocap_quat = (Quaternion(axis=[1, 0, 0], angle=-np.pi / 2) * Quaternion(init_quat)).elements return get_asset_full_path('sawyer_xyz/sawyer_wsg_coffee.xml')
def __init__(self, tasks = [{'goalPos' : [0.2, 0] }] , mode_1d = False, \ init_pos = [0,0] , goal_pos = [0.2, 0], mpl = 200, change_task_every_episode = False, *args, **kwargs): self.quick_init(locals()) model_name = get_asset_full_path('pointMass/point.xml') self.obj_init_pos = init_pos self.tasks = np.array(tasks) self.num_tasks = len(self.tasks) self.task = self.tasks[0] self.curr_path_length = 0 self.max_path_length = mpl self.mode_1d = mode_1d self.change_task_every_episode = change_task_every_episode MujocoEnv.__init__(self, model_name, frame_skip=1, automatically_set_spaces=True) #Serializable.__init__(self, *args, **kwargs) self.info_logKeys = ['targetDist']
def model_name(self): return get_asset_full_path( 'sawyer_xyz/sawyer_button_press_topdown_wall.xml')
def __init__(self, block_low=(-0.2, 0.55, 0.02), block_high=(0.2, 0.75, 0.02), hand_low=(0.0, 0.55, 0.3), hand_high=(0.0, 0.55, 0.3), stack_goal_low=(-0.2, 0.55, 0.02), stack_goal_high=(0.2, 0.75, 0.02), fix_goal=False, fixed_stack_goal=(0.0, 0.55, 0.02), fixed_hand_goal=(0.0, 0.75, 0.3), use_sparse_reward=False, sparse_reward_threshold=0.05, reset_free=False, hide_goal_markers=False, oracle_reset_prob=0.0, xml_path='sawyer_xyz/three_blocks.xml', **kwargs): self.quick_init(locals()) MultitaskEnv.__init__(self) SawyerXYZEnv.__init__(self, model_name=get_asset_full_path(xml_path), **kwargs) self.block_low = np.array(block_low) self.block_high = np.array(block_high) self.block_radius = 0.02 self.hand_low = np.array(hand_low) self.hand_high = np.array(hand_high) self.stack_goal_low = np.array(stack_goal_low) self.stack_goal_high = np.array(stack_goal_high) self.max_place_distance = max( np.linalg.norm((self.stack_goal_high - self.block_low), ord=2), np.linalg.norm((self.block_high - self.stack_goal_low), ord=2), ) self.fix_goal = fix_goal self.fixed_stack_goal = np.array(fixed_stack_goal) self.use_sparse_reward = use_sparse_reward self.sparse_reward_threshold = sparse_reward_threshold self.reset_free = reset_free self.hide_goal_markers = hide_goal_markers self.oracle_reset_prob = oracle_reset_prob self.action_space = Box(np.array([-1, -1, -1, -1]), np.array([1, 1, 1, 1]), dtype=np.float32) diff = (self.block_high[0] - self.block_low[0]) / 3.0 midpoint = diff + self.block_low[0] block_one_high = [midpoint, *self.block_high[1:]] block_two_low = [midpoint, *self.block_low[1:]] midpoint = midpoint + diff block_two_high = [midpoint, *self.block_high[1:]] block_three_low = [midpoint, *self.block_low[1:]] self.sampling_space = Box(np.hstack( ([0.0], self.hand_low, self.block_low, block_two_low, block_three_low)), np.hstack( ([0.04], self.hand_high, block_one_high, block_two_high, self.block_high)), dtype=np.float32) self.gripper_and_hand_and_blocks_space = Box( np.hstack(([0.0], self.hand_low, self.block_low, self.block_low, self.block_low)), np.hstack(([0.04], self.hand_high, self.block_high, self.block_high, self.block_high)), dtype=np.float32) self.observation_space = Dict([ ('observation', self.gripper_and_hand_and_blocks_space), ('desired_goal', self.gripper_and_hand_and_blocks_space), ('achieved_goal', self.gripper_and_hand_and_blocks_space), ('state_observation', self.gripper_and_hand_and_blocks_space), ('state_desired_goal', self.gripper_and_hand_and_blocks_space), ('state_achieved_goal', self.gripper_and_hand_and_blocks_space), ('proprio_observation', self.gripper_and_hand_and_blocks_space), ('proprio_desired_goal', self.gripper_and_hand_and_blocks_space), ('proprio_achieved_goal', self.gripper_and_hand_and_blocks_space), ]) self.block_one_id = self.model.get_joint_qpos_addr("blockOneJoint") self.block_two_id = self.model.get_joint_qpos_addr("blockTwoJoint") self.block_three_id = self.model.get_joint_qpos_addr("blockThreeJoint") self.reset()
def model_name(self): return get_asset_full_path('classic_mujoco/hopper.xml')
def __init__(self, puck_low=(-.4, .2), puck_high=(.4, 1), reward_type='state_distance', norm_order=1, indicator_threshold=0.06, hand_low=(-0.28, 0.3, 0.05), hand_high=(0.28, 0.9, 0.3), fix_goal=False, fixed_goal=(0.15, 0.6, 0.02, -0.15, 0.6), goal_low=(-0.25, 0.3, 0.02, -.2, .4), goal_high=(0.25, 0.875, 0.02, .2, .8), hide_goal_markers=False, init_puck_z=0.02, init_hand_xyz=(0, 0.4, 0.07), reset_free=False, xml_path='sawyer_xyz/sawyer_push_puck.xml', clamp_puck_on_step=False, puck_radius=.07, **kwargs): self.quick_init(locals()) self.model_name = get_asset_full_path(xml_path) MultitaskEnv.__init__(self) SawyerXYZEnv.__init__(self, hand_low=hand_low, hand_high=hand_high, model_name=self.model_name, **kwargs) if puck_low is None: puck_low = self.hand_low[:2] if puck_high is None: puck_high = self.hand_high[:2] puck_low = np.array(puck_low) puck_high = np.array(puck_high) self.puck_low = puck_low self.puck_high = puck_high if goal_low is None: goal_low = np.hstack((self.hand_low, puck_low)) if goal_high is None: goal_high = np.hstack((self.hand_high, puck_high)) self.goal_low = np.array(goal_low) self.goal_high = np.array(goal_high) self.reward_type = reward_type self.norm_order = norm_order self.indicator_threshold = indicator_threshold self.fix_goal = fix_goal self.fixed_goal = np.array(fixed_goal) self._state_goal = None self.hide_goal_markers = hide_goal_markers self.action_space = Box(np.array([-1, -1, -1]), np.array([1, 1, 1]), dtype=np.float32) self.hand_and_puck_space = Box(np.hstack((self.hand_low, puck_low)), np.hstack((self.hand_high, puck_high)), dtype=np.float32) self.hand_space = Box(self.hand_low, self.hand_high, dtype=np.float32) self.observation_space = Dict([ ('observation', self.hand_and_puck_space), ('desired_goal', self.hand_and_puck_space), ('achieved_goal', self.hand_and_puck_space), ('state_observation', self.hand_and_puck_space), ('state_desired_goal', self.hand_and_puck_space), ('state_achieved_goal', self.hand_and_puck_space), ('proprio_observation', self.hand_space), ('proprio_desired_goal', self.hand_space), ('proprio_achieved_goal', self.hand_space), ]) self.init_puck_z = init_puck_z self.init_hand_xyz = np.array(init_hand_xyz) self._set_puck_xy(self.sample_puck_xy()) self.reset_free = reset_free self.reset_counter = 0 self.puck_space = Box(self.puck_low, self.puck_high, dtype=np.float32) self.clamp_puck_on_step = clamp_puck_on_step self.puck_radius = puck_radius self.reset()
def model_name(self): return get_asset_full_path('sawyer_xyz/sawyer_reach.xml')
def __init__( self, puck_low=(-.35, .25), puck_high=(.35, .95), norm_order=1, indicator_threshold=0.06, touch_threshold=0.1, # I just chose this number after doing a few runs and looking at a histogram hand_low=(-0.28, 0.3, 0.05), hand_high=(0.28, 0.9, 0.3), fix_goal=True, fixed_goal=(0.15, 0.6, 0.02, -0.15, 0.6), goal_low=(-0.25, 0.3, 0.02, -0.2, .4), goal_high=(0.25, 0.875, 0.02, .2, .8), init_puck_z=0.035, init_hand_xyz=(0, 0.4, 0.07), reset_free=False, xml_path='sawyer_xyz/sawyer_push_puck.xml', clamp_puck_on_step=False, puck_radius=.07, **kwargs ): self.quick_init(locals()) self.model_name = get_asset_full_path(xml_path) self.goal_type = kwargs.pop('goal_type', 'puck') self.dense_reward = kwargs.pop('dense_reward', False) self.indicator_threshold = kwargs.pop('goal_tolerance', indicator_threshold) self.fixed_goal = np.array(kwargs.pop('goal', fixed_goal)) self.task_agnostic = kwargs.pop('task_agnostic', False) self.init_puck_xy = np.array(kwargs.pop('init_puck_xy', [0, 0.6])) self.init_hand_xyz = np.array(kwargs.pop('init_hand_xyz', init_hand_xyz)) MultitaskEnv.__init__(self) SawyerXYZEnv.__init__( self, hand_low=hand_low, hand_high=hand_high, model_name=self.model_name, **kwargs ) if puck_low is None: puck_low = self.hand_low[:2] if puck_high is None: puck_high = self.hand_high[:2] self.puck_low = np.array(puck_low) self.puck_high = np.array(puck_high) if goal_low is None: goal_low = np.hstack((self.hand_low, puck_low)) if goal_high is None: goal_high = np.hstack((self.hand_high, puck_high)) self.goal_low = np.array(goal_low) self.goal_high = np.array(goal_high) self.norm_order = norm_order self.touch_threshold = touch_threshold self.fix_goal = fix_goal self._state_goal = None self.hide_goal_markers = self.task_agnostic self.action_space = Box(np.array([-1, -1, -1]), np.array([1, 1, 1]), dtype=np.float32) self.hand_and_puck_space = Box( np.hstack((self.hand_low, puck_low)), np.hstack((self.hand_high, puck_high)), dtype=np.float32 ) self.hand_space = Box(self.hand_low, self.hand_high, dtype=np.float32) self.observation_space = Dict([ ('observation', self.hand_and_puck_space), ('desired_goal', self.hand_and_puck_space), ('achieved_goal', self.hand_and_puck_space), ('state_observation', self.hand_and_puck_space), ('state_desired_goal', self.hand_and_puck_space), ('state_achieved_goal', self.hand_and_puck_space), ('proprio_observation', self.hand_space), ('proprio_desired_goal', self.hand_space), ('proprio_achieved_goal', self.hand_space), ]) self.init_puck_z = init_puck_z self._set_puck_xy(self.sample_puck_xy()) self.reset_free = reset_free self.reset_counter = 0 self.puck_space = Box(self.puck_low, self.puck_high, dtype=np.float32) self.clamp_puck_on_step = clamp_puck_on_step self.puck_radius = puck_radius self.reset()
def model_name(self): return get_asset_full_path('sawyer_xyz/sawyer_dishRack.xml')
save_every = 10 render = False positions = [] velocities = [] for _ in range(num_rollouts): env.reset() start_xy = sample_start_xy() # import ipdb; ipdb.set_trace() state = env.sim.get_state() state.qpos[:2] = start_xy env.sim.set_state(state) # env.sim.model.geom_pos[1:, 0:2] += start_xy for t in range(num_steps_per_rollout): env.step(env.action_space.sample()) if t % save_every == 0: state = env.sim.get_state() positions.append(state.qpos.copy()[:15]) velocities.append(state.qvel.copy()[:14]) if render: env.render() print(len(positions)) positions = np.array(positions) positions_path = get_asset_full_path('classic_mujoco/ant_goal_qpos_5x5_xy.npy') np.save(positions_path, positions) velocities = np.array(velocities) velocities_path = get_asset_full_path( 'classic_mujoco/ant_goal_qvel_5x5_xy.npy') np.save(velocities_path, positions)
def model_name(self): return get_asset_full_path( 'sawyer_xyz/sawyer_push_and_reach_mocap_goal_hidden_hurdle_horizontal.xml' )
def model_name(self): return get_asset_full_path('sawyer_xyz/sawyer_window_horizontal.xml')
def model_name(self): return get_asset_full_path('classic_mujoco/walker2d.xml')
def __init__(self, goal_low=(0, ), goal_high=(1.0472, ), action_reward_scale=0, reward_type='angle_difference', indicator_threshold=(.02, .03), fix_goal=False, fixed_goal=(-.25), reset_free=False, fixed_hand_z=0.12, hand_low=(-0.1, 0.45, 0.15), hand_high=(0., 0.65, .225), target_pos_scale=1, target_angle_scale=1, min_angle=0, max_angle=1.0472, xml_path='sawyer_xyz/sawyer_door_pull_hook.xml', **sawyer_xyz_kwargs): self.quick_init(locals()) self.model_name = get_asset_full_path(xml_path) SawyerXYZEnv.__init__(self, self.model_name, hand_low=hand_low, hand_high=hand_high, **sawyer_xyz_kwargs) MultitaskEnv.__init__(self) # self.initialize_camera(camera) self.reward_type = reward_type self.indicator_threshold = indicator_threshold self.fix_goal = fix_goal self.fixed_goal = np.array(fixed_goal) self.goal_space = Box(np.array(goal_low), np.array(goal_high), dtype=np.float32) self._state_goal = None self.fixed_hand_z = fixed_hand_z self.action_space = Box(np.array([-1, -1, -1]), np.array([1, 1, 1]), dtype=np.float32) self.state_space = Box( np.concatenate((hand_low, [min_angle])), np.concatenate((hand_high, [max_angle])), dtype=np.float32, ) self.observation_space = Dict([ ('observation', self.state_space), ('desired_goal', self.goal_space), ('achieved_goal', self.goal_space), ('state_observation', self.state_space), ('state_desired_goal', self.goal_space), ('state_achieved_goal', self.goal_space), ]) self.action_reward_scale = action_reward_scale self.target_pos_scale = target_pos_scale self.target_angle_scale = target_angle_scale self.reset_free = reset_free self.door_angle_idx = self.model.get_joint_qpos_addr('doorjoint') #ensure env does not start in weird positions self.reset_free = True self.reset() self.reset_free = reset_free
def __init__( self, reward_type='dense', count_bonus_coeff=None, n_bins=16, target_radius=0.5, # Required distance to goal when using sparse reward norm_order=2, frame_skip=5, two_frames=False, vel_in_state=True, ant_low=list([-6, -6]), ant_high=list([6, 6]), goal_low=list([-6, -6]), goal_high=list([6, 6]), model_path='classic_mujoco/normal_gear_ratio_ant.xml', use_low_gear_ratio=False, goal_is_xy=False, goal_is_qpos=False, init_qpos=None, fixed_goal=None, diagnostics_goal=None, init_xy_mode='corner', terminate_when_unhealthy=False, healthy_z_range=(0.2, 0.9), # health_reward=10, done_penalty=0, goal_sampling_strategy='uniform', presampled_goal_paths='', *args, **kwargs): assert init_xy_mode in { 'corner', 'sample-uniformly-xy-space', 'sample-from-goal-space', # soon to be deprecated } assert not goal_is_xy or not goal_is_qpos self.quick_init(locals()) MultitaskEnv.__init__(self) if use_low_gear_ratio: model_path = "classic_mujoco/ant_maze_gear30_small_dt3_with_invis.xml" MujocoEnv.__init__(self, model_path=get_asset_full_path(model_path), frame_skip=frame_skip, **kwargs) if goal_is_xy: assert reward_type.startswith('xy') if init_qpos is not None: self.init_qpos[:len(init_qpos)] = np.array(init_qpos) self.action_space = Box(-np.ones(8), np.ones(8), dtype=np.float32) self.reward_type = reward_type self.count_bonus_coeff = count_bonus_coeff self.n_bins = n_bins self.bin_counts = np.ones((self.n_bins, self.n_bins)) self.target_radius = target_radius self.norm_order = norm_order self.goal_is_xy = goal_is_xy self.goal_is_qpos = goal_is_qpos self.fixed_goal = fixed_goal self.diagnostics_goal = diagnostics_goal print(f"[Ant] Diagnostics goal: {self.diagnostics_goal}") self.init_xy_mode = init_xy_mode self.terminate_when_unhealthy = terminate_when_unhealthy # self._healthy_reward = health_reward self._done_penalty = done_penalty self._healthy_z_range = healthy_z_range self.model_path = model_path assert goal_sampling_strategy in {'uniform', 'preset1', 'presampled'} self.goal_sampling_strategy = goal_sampling_strategy if self.goal_sampling_strategy == 'presampled': assert presampled_goal_paths is not None if not osp.exists(presampled_goal_paths): presampled_goal_paths = get_asset_full_path( presampled_goal_paths) self.presampled_goals = np.load(presampled_goal_paths) else: self.presampled_goals = None self.ant_low, self.ant_high = np.array(ant_low), np.array(ant_high) goal_low, goal_high = np.array(goal_low), np.array(goal_high) self.two_frames = two_frames self.vel_in_state = vel_in_state if self.vel_in_state: obs_space_low = np.concatenate((self.ant_low, -np.ones(27))) obs_space_high = np.concatenate((self.ant_high, np.ones(27))) if goal_is_xy: goal_space_low = goal_low goal_space_high = goal_high else: goal_space_low = np.concatenate((goal_low, -np.ones(27))) goal_space_high = np.concatenate((goal_high, np.ones(27))) else: obs_space_low = np.concatenate((self.ant_low, -np.ones(13))) obs_space_high = np.concatenate((self.ant_high, np.ones(13))) if goal_is_xy: goal_space_low = goal_low goal_space_high = goal_high else: goal_space_low = np.concatenate((goal_low, -np.ones(13))) goal_space_high = np.concatenate((goal_high, np.ones(13))) if self.two_frames: self.obs_space = Box(np.concatenate( (obs_space_low, obs_space_low)), np.concatenate( (obs_space_high, obs_space_high)), dtype=np.float32) self.goal_space = Box(np.concatenate( (goal_space_low, goal_space_low)), np.concatenate( (goal_space_high, goal_space_high)), dtype=np.float32) else: self.obs_space = Box(obs_space_low, obs_space_high, dtype=np.float32) self.goal_space = Box(goal_space_low, goal_space_high, dtype=np.float32) qpos_space = Box(-10 * np.ones(15), 10 * np.ones(15)) spaces = [ ('observation', self.obs_space), ('desired_goal', self.goal_space), ('achieved_goal', self.goal_space), ('state_observation', self.obs_space), ('state_desired_goal', self.goal_space), ('state_achieved_goal', self.goal_space), ] if self.goal_is_xy: self.xy_obs_space = Box(goal_low, goal_high, dtype=np.float32) spaces += [ ('xy_observation', self.xy_obs_space), ('xy_desired_goal', self.xy_obs_space), ('xy_achieved_goal', self.xy_obs_space), ] if self.goal_is_qpos: spaces += [ ('qpos_desired_goal', qpos_space), ('qpos_achieved_goal', qpos_space), ] self.observation_space = Dict(spaces) self._full_state_goal = None self._xy_goal = None self._qpos_goal = None self._prev_obs = None self._cur_obs = None
def model_name(self): return get_asset_full_path('sawyer_xyz/sawyer_pick_and_place.xml')
def model_name(self): #Remember to set the right limits in the base file (right line needs to be commented out) self.reset_mocap_quat = [1, 0, 1, 0] return get_asset_full_path('sawyer_xyz/multi_domain.xml')
def model_name(self): return get_asset_full_path('classic_mujoco/half_cheetah.xml')
def model_name(self): return get_asset_full_path( 'sawyer_xyz/sawyer_reach_push_pick_and_place_wall.xml')