def __init__(self, reward_type='dense', norm_order=2, action_scale=0.15, frame_skip=100, ball_low=(-4.0, -4.0), ball_high=(4.0, 4.0), goal_low=(-4.0, -4.0), goal_high=(4.0, 4.0), model_path='pointmass_u_wall_big.xml', reset_low=None, reset_high=None, v_func_heatmap_bounds=(-2.0, 0.0), *args, **kwargs): self.quick_init(locals()) MultitaskEnv.__init__(self) MujocoEnv.__init__(self, model_path=get_asset_full_path('pointmass/' + model_path), frame_skip=frame_skip, **kwargs) self.action_space = Box(np.array([-1, -1]), np.array([1, 1]), dtype=np.float32) self.action_scale = action_scale self.ball_radius = 0.50 self.walls = [ Wall(0, 1.0, 2.5, 0.5, self.ball_radius), Wall(2.0, -0.5, 0.5, 2.0, self.ball_radius), Wall(-2.0, -0.5, 0.5, 2.0, self.ball_radius), ] self.reward_type = reward_type self.norm_order = norm_order self.ball_low, self.ball_high = np.array(ball_low), np.array(ball_high) self.goal_low, self.goal_high = np.array(goal_low), np.array(goal_high) if reset_low is None: self.reset_low = np.array(ball_low) else: self.reset_low = np.array(reset_low) if reset_high is None: self.reset_high = np.array(ball_high) else: self.reset_high = np.array(reset_high) obs_space_low = np.copy(self.ball_low) obs_space_high = np.copy(self.ball_high) goal_space_low = np.copy(self.goal_low) goal_space_high = np.copy(self.goal_high) self.obs_space = Box(obs_space_low, obs_space_high, dtype=np.float32) self.goal_space = Box(goal_space_low, goal_space_high, dtype=np.float32) self.observation_space = Dict([ ('observation', self.obs_space), ('desired_goal', self.goal_space), ('achieved_goal', self.obs_space), ('state_observation', self.obs_space), ('state_desired_goal', self.goal_space), ('state_achieved_goal', self.obs_space), ('proprio_observation', self.obs_space), ('proprio_desired_goal', self.goal_space), ('proprio_achieved_goal', self.obs_space), ]) self.v_func_heatmap_bounds = v_func_heatmap_bounds self._state_goal = None self.reset()
def __init__(self, reward_info=None, frame_skip=50, pos_action_scale=2. / 100, randomize_goals=True, hide_goal=False, init_block_low=(-0.02, 0.54), init_block_high=(-0.02, 0.54), puck_goal_low=(-0.05, 0.57), puck_goal_high=(0.05, 0.65), hand_goal_low=(-0.05, 0.55), hand_goal_high=(0.05, 0.65), fixed_puck_goal=(0.05, 0.6), fixed_hand_goal=(-0.05, 0.6), mocap_low=(-0.1, 0.5, 0.0), mocap_high=(0.1, 0.7, 0.5), force_puck_in_goal_space=False, puckInvisible=False): self.quick_init(locals()) self.reward_info = reward_info self.randomize_goals = randomize_goals self._pos_action_scale = pos_action_scale self.hide_goal = hide_goal self.puckInvisible = puckInvisible self.init_block_low = np.array(init_block_low) self.init_block_high = np.array(init_block_high) self.puck_goal_low = np.array(puck_goal_low) self.puck_goal_high = np.array(puck_goal_high) self.hand_goal_low = np.array(hand_goal_low) self.hand_goal_high = np.array(hand_goal_high) self.fixed_puck_goal = np.array(fixed_puck_goal) self.fixed_hand_goal = np.array(fixed_hand_goal) self.mocap_low = np.array(mocap_low) self.mocap_high = np.array(mocap_high) self.force_puck_in_goal_space = force_puck_in_goal_space self._goal_xyxy = self.sample_goal_xyxy() # MultitaskEnv.__init__(self, distance_metric_order=2) MujocoEnv.__init__(self, self.model_name, frame_skip=frame_skip) self.action_space = Box( np.array([-1, -1]), np.array([1, 1]), ) self.obs_box = Box( np.array([-0.2, 0.5, -0.2, 0.5]), np.array([0.2, 0.7, 0.2, 0.7]), ) goal_low = np.concatenate((self.hand_goal_low, self.puck_goal_low)) goal_high = np.concatenate((self.hand_goal_high, self.puck_goal_high)) self.goal_box = Box( goal_low, goal_high, ) self.observation_space = Dict([ ('observation', self.obs_box), ('state_observation', self.obs_box), ('desired_goal', self.goal_box), ('state_desired_goal', self.goal_box), ('achieved_goal', self.goal_box), ('state_achieved_goal', self.goal_box), ]) # hack for state-based experiments for other envs # self.observation_space = Box( # np.array([-0.2, 0.5, -0.2, 0.5, -0.2, 0.5]), # np.array([0.2, 0.7, 0.2, 0.7, 0.2, 0.7]), # ) # self.goal_space = Box( # np.array([-0.2, 0.5, -0.2, 0.5, -0.2, 0.5]), # np.array([0.2, 0.7, 0.2, 0.7, 0.2, 0.7]), # ) self.reset() self.reset_mocap_welds()
def __init__( self, obj_low=None, obj_high=None, hand_low=(-0.1, 0.55, 0.05), hand_high=(0.05, 0.65, 0.2), reward_type='hand_and_obj_distance', indicator_threshold=0.06, frame_skip = 50, obj_init_positions=((0, 0.6, 0.02),), random_init=False, num_objects=1, fix_goal=False, mocap_low=(-0.13, 0.57, 0.04), mocap_high=(0.08, 0.73, 0.2), action_scale=0.02, fixed_goal=(0.15, 0.6, 0.055, -0.15, 0.6), goal_low=None, goal_high=None, reset_free=False, filename='sawyer_pick_and_place_multiobj.xml', object_mass=1, # object_meshes=['Bowl', 'GlassBowl', 'LotusBowl01', 'ElephantBowl', 'RuggedBowl'], object_meshes=None, obj_classname = None, block_height=0.04, block_width = 0.04, cylinder_radius = 0.015, finger_sensors=False, maxlen=0.06, minlen=0.01, preload_obj_dict=None, hide_goal_markers=False, oracle_reset_prob=0.0, presampled_goals=None, num_goals_presampled=2, p_obj_in_hand=1, **kwargs ): self.quick_init(locals()) MultitaskEnv.__init__(self) base_filename = asset_base_path + filename friction_params = (1, 1, 2) self.obj_stat_prop = create_object_xml(base_filename, num_objects, object_mass, friction_params, object_meshes, finger_sensors, maxlen, minlen, preload_obj_dict, obj_classname, block_height, block_width, cylinder_radius) gen_xml = create_root_xml(base_filename) MujocoEnv.__init__(self, gen_xml, frame_skip=frame_skip) self.reset_mocap_welds() clean_xml(gen_xml) self.mocap_low = mocap_low self.mocap_high = mocap_high self.action_scale = action_scale self.hand_low = np.array(hand_low) self.hand_high = np.array(hand_high) if obj_low is None: obj_low = self.hand_low if obj_high is None: obj_high = self.hand_high self.obj_low = obj_low self.obj_high = obj_high if goal_low is None: goal_low = np.hstack((self.hand_low, obj_low)) if goal_high is None: goal_high = np.hstack((self.hand_high, obj_high)) self.reward_type = reward_type self.random_init = random_init self.p_obj_in_hand = p_obj_in_hand self.indicator_threshold = indicator_threshold self.obj_init_z = obj_init_positions[0][2] self.obj_init_positions = np.array(obj_init_positions) self.last_obj_pos = self.obj_init_positions[0] self.fix_goal = fix_goal self.fixed_goal = np.array(fixed_goal) self._state_goal = None self.reset_free = reset_free self.oracle_reset_prob = oracle_reset_prob self.obj_ind_to_manip = 0 self.hide_goal_markers = hide_goal_markers self.action_space = Box( np.array([-1, -1, -1, -1]), np.array([1, 1, 1, 1]), dtype=np.float32 ) self.hand_and_obj_space = Box( np.hstack((self.hand_low, obj_low)), np.hstack((self.hand_high, obj_high)), dtype=np.float32 ) self.hand_space = Box( self.hand_low, self.hand_high, dtype=np.float32 ) self.gripper_and_hand_and_obj_space = Box( np.hstack(([0.0], self.hand_low, obj_low)), np.hstack(([0.04], self.hand_high, obj_high)), dtype=np.float32 ) self.num_objects = num_objects self.maxlen = maxlen self.observation_space = Dict([ ('observation', self.gripper_and_hand_and_obj_space), ('desired_goal', self.hand_and_obj_space), ('achieved_goal', self.hand_and_obj_space), ('state_observation', self.gripper_and_hand_and_obj_space), ('state_desired_goal', self.hand_and_obj_space), ('state_achieved_goal', self.hand_and_obj_space), ('proprio_observation', self.hand_space), ('proprio_desired_goal', self.hand_space), ('proprio_achieved_goal', self.hand_space), ]) self.hand_reset_pos = np.array([0, .6, .2]) if presampled_goals is not None: self._presampled_goals = presampled_goals self.num_goals_presampled = len(list(self._presampled_goals.values)[0]) else: # presampled_goals will be created when sample_goal is first called self._presampled_goals = None self.num_goals_presampled = num_goals_presampled self.num_goals_presampled = 10 self.picked_up_object = False self.train_pickups = 0 self.eval_pickups = 0 self.cur_mode = 'train' self.reset()
def __init__(self, model_name, frame_skip=50): MujocoEnv.__init__(self, model_name, frame_skip=frame_skip) self.reset_mocap_welds()
def __init__( self, reward_info=None, frame_skip=50, pos_action_scale=4. / 100, randomize_goals=True, puck_goal_low=(-0.1, 0.5), puck_goal_high=(0.1, 0.7), hand_goal_low=(-0.1, 0.5), hand_goal_high=(0.1, 0.7), mocap_low=(-0.1, 0.5, 0.0), mocap_high=(0.1, 0.7, 0.5), # unused init_block_low=(-0.05, 0.55), init_block_high=(0.05, 0.65), fixed_puck_goal=(0.05, 0.6), fixed_hand_goal=(-0.05, 0.6), # multi-object num_objects=1, fixed_colors=True, seed=None, filename='sawyer_multiobj.xml', object_mass=1, # object_meshes=['Bowl', 'GlassBowl', 'LotusBowl01', 'ElephantBowl', 'RuggedBowl'], object_meshes=None, obj_classname=None, block_height=0.02, block_width=0.02, cylinder_radius=0.05, finger_sensors=False, maxlen=0.06, minlen=0.01, preload_obj_dict=None, reset_to_initial_position=True, object_low=(-np.inf, -np.inf, -np.inf), object_high=(np.inf, np.inf, np.inf), action_repeat=1, fixed_start=True, fixed_start_pos=(0, 0.6), goal_moves_one_object=False, num_scene_objects=None, # list of number of objects that can appear per scene object_height=0.02, use_textures=False, init_camera=None, sliding_joints=False, ): if seed: np.random.seed(seed) self.env_seed = seed self.quick_init(locals()) self.reward_info = reward_info self.randomize_goals = randomize_goals self._pos_action_scale = pos_action_scale self.reset_to_initial_position = reset_to_initial_position self.init_block_low = np.array(init_block_low) self.init_block_high = np.array(init_block_high) self.puck_goal_low = np.array(puck_goal_low) self.puck_goal_high = np.array(puck_goal_high) self.hand_goal_low = np.array(hand_goal_low) self.hand_goal_high = np.array(hand_goal_high) self.fixed_puck_goal = np.array(fixed_puck_goal) self.fixed_hand_goal = np.array(fixed_hand_goal) self.mocap_low = np.array(mocap_low) self.mocap_high = np.array(mocap_high) self.object_low = np.array(object_low) self.object_high = np.array(object_high) self.action_repeat = action_repeat self.fixed_colors = fixed_colors self.goal_moves_one_object = goal_moves_one_object self.num_objects = num_objects self.num_scene_objects = num_scene_objects self.object_height = object_height self.fixed_start = fixed_start self.fixed_start_pos = np.array(fixed_start_pos) self.maxlen = maxlen self.use_textures = use_textures self.sliding_joints = sliding_joints self.cur_objects = [0] * num_objects self.preload_obj_dict = preload_obj_dict self.num_cur_objects = 0 # Generate XML base_filename = asset_base_path + filename friction_params = (0.1, 0.1, 0.02) self.obj_stat_prop = create_object_xml( base_filename, num_objects, object_mass, friction_params, object_meshes, finger_sensors, maxlen, minlen, preload_obj_dict, obj_classname, block_height, block_width, cylinder_radius, use_textures, sliding_joints) gen_xml = create_root_xml(base_filename) MujocoEnv.__init__(self, gen_xml, frame_skip=frame_skip) clean_xml(gen_xml) if self.use_textures: self.modder = TextureModder(self.sim) self.state_goal = self.sample_goal_for_rollout() # MultitaskEnv.__init__(self, distance_metric_order=2) # MujocoEnv.__init__(self, gen_xml, frame_skip=frame_skip) self.action_space = Box( np.array([-1, -1]), np.array([1, 1]), ) self.num_objects = num_objects low = (self.num_scene_objects[0] + 1) * [-0.2, 0.5] high = (self.num_scene_objects[0] + 1) * [0.2, 0.7] self.obs_box = Box( np.array(low), np.array(high), ) goal_low = np.array( low) # np.concatenate((self.hand_goal_low, self.puck_goal_low)) goal_high = np.array( high) # np.concatenate((self.hand_goal_high, self.puck_goal_high)) self.goal_box = Box( goal_low, goal_high, ) self.total_objects = self.num_objects + 1 self.objects_box = Box( np.zeros((self.total_objects, )), np.ones((self.total_objects, )), ) self.observation_space = Dict([ ('observation', self.obs_box), ('state_observation', self.obs_box), ('desired_goal', self.goal_box), ('state_desired_goal', self.goal_box), ('achieved_goal', self.goal_box), ('state_achieved_goal', self.goal_box), ('objects', self.objects_box), ]) # hack for state-based experiments for other envs # self.observation_space = Box( # np.array([-0.2, 0.5, -0.2, 0.5, -0.2, 0.5]), # np.array([0.2, 0.7, 0.2, 0.7, 0.2, 0.7]), # ) # self.goal_space = Box( # np.array([-0.2, 0.5, -0.2, 0.5, -0.2, 0.5]), # np.array([0.2, 0.7, 0.2, 0.7, 0.2, 0.7]), # ) self.set_initial_object_positions() if use_textures: super().initialize_camera(init_camera) self.initialized_camera = init_camera self.reset() self.reset_mocap_welds()
def __init__( self, frame_skip=50, action_scale=2. / 100, hand_low=(-0.2, 0.5), hand_high=(0.2, 0.7), puck_low=(-0.2, 0.5), puck_high=(0.2, 0.7), fix_goal=False, sample_realistic_goals=False, fixed_goal=(-0.05, 0.6, 0.05, 0.6), goal_low=None, goal_high=None, fix_reset=False, fixed_reset=(0, 0.55, 0.0, 0.65), reset_low=None, reset_high=None, hand_z_position=0.06, puck_z_position=0.02, reward_type='state_distance', norm_order=2, indicator_threshold=0.06, num_mocap_calls_for_reset=250, square_puck=False, heavy_puck=False, invisible_boundary_wall=False, indicator_threshold_2=0.08, indicator_threshold_3=0.12, test_mode_case_num=None, ): self.quick_init(locals()) self.square_puck = square_puck self.heavy_puck = heavy_puck self.invisible_boundary_wall = invisible_boundary_wall if self.invisible_boundary_wall: model_name = get_asset_full_path( 'sawyer_xyz/sawyer_push_and_reach_nips_wall.xml') elif self.square_puck and self.heavy_puck: model_name = get_asset_full_path( 'sawyer_xyz/sawyer_push_and_reach_nips_square_heavy.xml') elif self.square_puck and not self.heavy_puck: model_name = get_asset_full_path( 'sawyer_xyz/sawyer_push_and_reach_nips_square.xml') elif not self.square_puck and self.heavy_puck: model_name = get_asset_full_path( 'sawyer_xyz/sawyer_push_and_reach_nips_heavy.xml') else: model_name = get_asset_full_path( 'sawyer_xyz/sawyer_push_and_reach_nips.xml') MujocoEnv.__init__(self, model_name, frame_skip=frame_skip) hand_low = np.array(hand_low) hand_high = np.array(hand_high) mocap_low = hand_low mocap_high = hand_high self.mocap_low = np.hstack((mocap_low, np.array([0.0]))) self.mocap_high = np.hstack((mocap_high, np.array([0.5]))) puck_low = np.array(puck_low) puck_high = np.array(puck_high) if self.square_puck: self.puck_radius = np.sqrt(2) * 0.04 else: self.puck_radius = 0.04 self.ee_radius = 0.015 # puck_low += (self.puck_radius + self.ee_radius) # puck_high -= (self.puck_radius + self.ee_radius) # print(hand_low) # print(hand_high) # print(puck_low) # print(puck_high) self.obs_space = Box(np.hstack((hand_low, puck_low)), np.hstack((hand_high, puck_high)), dtype=np.float32) self.hand_space = Box(hand_low, hand_high, dtype=np.float32) self.puck_space = Box(puck_low, puck_high, dtype=np.float32) if goal_low is None: goal_low = self.obs_space.low.copy() if goal_high is None: goal_high = self.obs_space.high.copy() if reset_low is None: reset_low = self.obs_space.low.copy() if reset_high is None: reset_high = self.obs_space.high.copy() goal_low = np.array(goal_low) goal_high = np.array(goal_high) reset_low = np.array(reset_low) reset_high = np.array(reset_high) self.goal_space = Box(goal_low, goal_high, dtype=np.float32) self.reset_space = Box(reset_low, reset_high, dtype=np.float32) self.observation_space = Dict([ ('observation', self.obs_space), ('desired_goal', self.goal_space), ('achieved_goal', self.obs_space), ('state_observation', self.obs_space), ('state_desired_goal', self.goal_space), ('state_achieved_goal', self.obs_space), ('proprio_observation', self.hand_space), ('proprio_desired_goal', Box(goal_low[:2], goal_high[:2], dtype=np.float32)), ('proprio_achieved_goal', self.hand_space), ]) self.num_mocap_calls_for_reset = num_mocap_calls_for_reset self.fix_reset = fix_reset self.sample_realistic_goals = sample_realistic_goals self.fixed_reset = np.array(fixed_reset) self.fix_goal = fix_goal self.fixed_goal = np.array(fixed_goal) self._state_goal = None self.test_mode_case_num = test_mode_case_num assert self.test_mode_case_num in [None, 1, 2, 3, 4, 5] if self.test_mode_case_num is not None: self.fix_reset = True self.fix_goal = True if self.test_mode_case_num == 1: self.fixed_reset = np.array([0.20, 0.70, -0.15, 0.55]) self.fixed_goal = np.array([-0.20, 0.50, 0.20, 0.70]) elif self.test_mode_case_num == 2: self.fixed_reset = np.array([0.20, 0.70, -0.15, 0.55]) self.fixed_goal = np.array([0.15, 0.65, 0.20, 0.70]) elif self.test_mode_case_num == 3: self.fixed_reset = np.array([0.20, 0.70, -0.15, 0.55]) self.fixed_goal = np.array([0.20, 0.70, -0.20, 0.70]) elif self.test_mode_case_num == 4: self.fixed_reset = np.array([0.20, 0.70, -0.15, 0.55]) self.fixed_goal = np.array([0.20, 0.50, -0.20, 0.70]) elif self.test_mode_case_num == 5: self.fixed_reset = np.array([0.20, 0.70, -0.15, 0.55]) self.fixed_goal = np.array([0.20, 0.50, 0.20, 0.70]) self.reward_type = reward_type self.norm_order = norm_order self.indicator_threshold = indicator_threshold self.indicator_threshold_2 = indicator_threshold_2 self.indicator_threshold_3 = indicator_threshold_3 self.action_space = Box(np.array([-1, -1]), np.array([1, 1]), dtype=np.float32) self._action_scale = action_scale self.hand_z_position = hand_z_position self.puck_z_position = puck_z_position self.reset_counter = 0 self.reset()
def __init__( self, reward_info=None, frame_skip=50, pos_action_scale=2. / 100, randomize_goals=True, puck_goal_low=(-0.1, 0.5), puck_goal_high=(0.1, 0.7), hand_goal_low=(-0.1, 0.5), hand_goal_high=(0.1, 0.7), mocap_low=(-0.1, 0.5, 0.0), mocap_high=(0.1, 0.7, 0.5), # unused init_block_low=(-0.05, 0.55), init_block_high=(0.05, 0.65), fixed_puck_goal=(0.05, 0.6), fixed_hand_goal=(-0.05, 0.6), # multi-object num_objects=1, filename='sawyer_multiobj.xml', object_mass=1, # object_meshes=['Bowl', 'GlassBowl', 'LotusBowl01', 'ElephantBowl', 'RuggedBowl'], object_meshes=None, obj_classname=None, block_height=0.02, block_width=0.02, cylinder_radius=0.05, finger_sensors=False, maxlen=0.06, minlen=0.01, preload_obj_dict=None, reset_to_initial_position=True, object_low=(-0.1, 0.5, 0.0), object_high=(0.1, 0.7, 0.5), action_repeat=1, fixed_start=True, ): self.quick_init(locals()) self.reward_info = reward_info self.randomize_goals = randomize_goals self._pos_action_scale = pos_action_scale self.reset_to_initial_position = reset_to_initial_position self.init_block_low = np.array(init_block_low) self.init_block_high = np.array(init_block_high) self.puck_goal_low = np.array(puck_goal_low) self.puck_goal_high = np.array(puck_goal_high) self.hand_goal_low = np.array(hand_goal_low) self.hand_goal_high = np.array(hand_goal_high) self.fixed_puck_goal = np.array(fixed_puck_goal) self.fixed_hand_goal = np.array(fixed_hand_goal) self.mocap_low = np.array(mocap_low) self.mocap_high = np.array(mocap_high) self.object_low = np.array(object_low) self.object_high = np.array(object_high) self.action_repeat = action_repeat self.num_objects = num_objects self.fixed_start = fixed_start self.maxlen = maxlen # Generate XML base_filename = asset_base_path + filename friction_params = (0.1, 0.1, 0.02) self.obj_stat_prop = create_object_xml( base_filename, num_objects, object_mass, friction_params, object_meshes, finger_sensors, maxlen, minlen, preload_obj_dict, obj_classname, block_height, block_width, cylinder_radius) gen_xml = create_root_xml(base_filename) MujocoEnv.__init__(self, gen_xml, frame_skip=frame_skip) clean_xml(gen_xml) self.state_goal = self.sample_goal_xyxy() # MultitaskEnv.__init__(self, distance_metric_order=2) # MujocoEnv.__init__(self, gen_xml, frame_skip=frame_skip) self.action_space = Box( np.array([-1, -1]), np.array([1, 1]), ) self.num_objects = num_objects low = (self.num_objects + 1) * [-0.2, 0.5] high = (self.num_objects + 1) * [0.2, 0.7] self.obs_box = Box( np.array(low), np.array(high), ) goal_low = np.array( low) # np.concatenate((self.hand_goal_low, self.puck_goal_low)) goal_high = np.array( high) # np.concatenate((self.hand_goal_high, self.puck_goal_high)) self.goal_box = Box( goal_low, goal_high, ) self.observation_space = Dict([ ('observation', self.obs_box), ('state_observation', self.obs_box), ('desired_goal', self.goal_box), ('state_desired_goal', self.goal_box), ('achieved_goal', self.goal_box), ('state_achieved_goal', self.goal_box), ]) # hack for state-based experiments for other envs # self.observation_space = Box( # np.array([-0.2, 0.5, -0.2, 0.5, -0.2, 0.5]), # np.array([0.2, 0.7, 0.2, 0.7, 0.2, 0.7]), # ) # self.goal_space = Box( # np.array([-0.2, 0.5, -0.2, 0.5, -0.2, 0.5]), # np.array([0.2, 0.7, 0.2, 0.7, 0.2, 0.7]), # ) self.set_initial_object_positions() self.reset() self.reset_mocap_welds()
def __init__( self, reward_type='state_distance', norm_order=2, action_scale=20, frame_skip=3, two_frames=False, vel_in_state=True, z_in_state=True, car_low=(-1.90, -1.90), #list([-1.60, -1.60]), car_high=(1.90, 1.90), #list([1.60, 1.60]), goal_low=(-1.90, -1.90), #list([-1.60, -1.60]), goal_high=(1.90, 1.90), #list([1.60, 1.60]), model_path='wheeled_car.xml', reset_low=None, reset_high=None, *args, **kwargs): self.quick_init(locals()) MultitaskEnv.__init__(self) MujocoEnv.__init__(self, model_path=get_asset_full_path('locomotion/' + model_path), frame_skip=frame_skip, **kwargs) self.action_space = Box(np.array([-1, -1]), np.array([1, 1]), dtype=np.float32) self.action_scale = action_scale # set radius if model_path in ['wheeled_car.xml', 'wheeled_car_u_wall.xml']: self.car_radius = np.sqrt(2) * 0.34 elif model_path == 'wheeled_car_old.xml': self.car_radius = 0.30 else: raise NotImplementedError # set walls if model_path == 'wheeled_car_u_wall.xml': self.walls = [ Wall(0, -0.5, 0.75, 0.05, self.car_radius), Wall(0.70, 0.05, 0.05, 0.5, self.car_radius), Wall(-0.70, 0.05, 0.05, 0.5, self.car_radius), ] else: self.walls = [] self.reward_type = reward_type self.norm_order = norm_order self.car_low, self.car_high = np.array(car_low), np.array(car_high) self.goal_low, self.goal_high = np.array(goal_low), np.array(goal_high) if reset_low is None: self.reset_low = np.array(car_low) else: self.reset_low = np.array(reset_low) if reset_high is None: self.reset_high = np.array(car_high) else: self.reset_high = np.array(reset_high) self.car_low += self.car_radius self.car_high -= self.car_radius self.goal_low += self.car_radius self.goal_high -= self.car_radius self.reset_low += self.car_radius self.reset_high -= self.car_radius self.two_frames = two_frames self.vel_in_state = vel_in_state self.z_in_state = z_in_state # x and y pos obs_space_low = np.copy(self.car_low) obs_space_high = np.copy(self.car_high) goal_space_low = np.copy(self.goal_low) goal_space_high = np.copy(self.goal_high) # z pos if self.z_in_state: obs_space_low = np.concatenate((obs_space_low, [-1])) obs_space_high = np.concatenate((obs_space_high, [0.03])) goal_space_low = np.concatenate((goal_space_low, [0])) goal_space_high = np.concatenate((goal_space_high, [0])) # sin and cos obs_space_low = np.concatenate((obs_space_low, [-1, -1])) obs_space_high = np.concatenate((obs_space_high, [1, 1])) goal_space_low = np.concatenate((goal_space_low, [-1, -1])) goal_space_high = np.concatenate((goal_space_high, [1, 1])) if self.vel_in_state: # x and y vel obs_space_low = np.concatenate((obs_space_low, [-10, -10])) obs_space_high = np.concatenate((obs_space_high, [10, 10])) goal_space_low = np.concatenate((goal_space_low, [0, 0])) goal_space_high = np.concatenate((goal_space_high, [0, 0])) # z vel if self.z_in_state: obs_space_low = np.concatenate((obs_space_low, [-10])) obs_space_high = np.concatenate((obs_space_high, [10])) goal_space_low = np.concatenate((goal_space_low, [0])) goal_space_high = np.concatenate((goal_space_high, [0])) # theta vel obs_space_low = np.concatenate((obs_space_low, [-10])) obs_space_high = np.concatenate((obs_space_high, [10])) goal_space_low = np.concatenate((goal_space_low, [0])) goal_space_high = np.concatenate((goal_space_high, [0])) self.obs_space = Box(obs_space_low, obs_space_high, dtype=np.float32) self.goal_space = Box(goal_space_low, goal_space_high, dtype=np.float32) if self.two_frames: self.obs_space = concatenate_box_spaces(self.obs_space, self.obs_space) self.goal_space = concatenate_box_spaces(self.goal_space, self.goal_space) self.observation_space = Dict([ ('observation', self.obs_space), ('desired_goal', self.goal_space), ('achieved_goal', self.obs_space), ('state_observation', self.obs_space), ('state_desired_goal', self.goal_space), ('state_achieved_goal', self.obs_space), ('proprio_observation', self.obs_space), ('proprio_desired_goal', self.goal_space), ('proprio_achieved_goal', self.obs_space), ]) self._state_goal = None self._cur_obs = None self._prev_obs = None self.reset()
def __init__( self, # Room room, # Start and Goal start_config="all", goal_config="all", # Reward potential_type='', shaped=False, base_reward='', # State and Goal Representations state_embedding=None, goal_embedding=None, max_path_length=None, *args, **kwargs): # Initialize Serializable.quick_init(self, locals()) MultitaskEnv.__init__(self) # Environment Configuration self._room = room model = self.room.get_mjcmodel() self.possible_positions = self.room.XY(n=50) with model.asfile() as f: MujocoEnv.__init__(self, f.name, frame_skip=self.FRAME_SKIP) # Initialization self.start_config = start_config self.baseline_start = self.room.get_start() self.start = np.zeros_like(self.baseline_start) self.goal_config = goal_config self.baseline_goal = self.room.get_target() self.goal = np.zeros_like(self.baseline_goal) # Time Limit self.curr_path_length = 0 if max_path_length is None: self.max_path_length = self.MAX_PATH_LENGTH else: self.max_path_length = max_path_length # Reward Functions self.potential_type = potential_type self.shaped = shaped self.base_reward = base_reward # State Representations self.state_embedding = state_embedding if state_embedding is not None else IdentityEmbedding( ) self.goal_embedding = goal_embedding if goal_embedding is not None else IdentityEmbedding( ) # Action Space bounds = self.model.actuator_ctrlrange.copy() self.action_space = Box(low=bounds[:, 0], high=bounds[:, 1]) # Observation Space example_state_obs = self._get_env_obs() example_obs = self.state_embedding.get_embedding(example_state_obs) state_obs_size = example_obs.shape[0] obs_size = example_obs.shape[0] self.obs_space = Box(-1 * np.inf * np.ones(obs_size), np.inf * np.ones(obs_size)) self.state_obs_space = Box(-1 * np.inf * np.ones(state_obs_size), np.inf * np.ones(state_obs_size)) # Goal Space example_goal = self._get_env_achieved_goal(example_state_obs) state_goal_size = example_goal.shape[0] goal_size = self.goal_embedding.get_embedding(example_goal).shape[0] self.goal_space = Box(-1 * np.inf * np.ones(goal_size), np.inf * np.ones(goal_size)) self.state_goal_space = Box(-1 * np.inf * np.ones(state_goal_size), np.inf * np.ones(state_goal_size)) # Final Setup self.observation_space = Dict([ ('observation', self.obs_space), ('desired_goal', self.goal_space), ('achieved_goal', self.goal_space), ('state_observation', self.state_obs_space), ('state_desired_goal', self.state_goal_space), ('state_achieved_goal', self.state_goal_space), ]) self.reset()