def __init__( self, reward_type='state_distance', norm_order=2, action_scale=20, frame_skip=3, two_frames=False, vel_in_state=True, z_in_state=True, car_low=(-1.90, -1.90), #list([-1.60, -1.60]), car_high=(1.90, 1.90), #list([1.60, 1.60]), goal_low=(-1.90, -1.90), #list([-1.60, -1.60]), goal_high=(1.90, 1.90), #list([1.60, 1.60]), model_path='wheeled_car.xml', reset_low=None, reset_high=None, *args, **kwargs): self.quick_init(locals()) MultitaskEnv.__init__(self) MujocoEnv.__init__(self, model_path=get_asset_full_path('locomotion/' + model_path), frame_skip=frame_skip, **kwargs) self.action_space = Box(np.array([-1, -1]), np.array([1, 1]), dtype=np.float32) self.action_scale = action_scale # set radius if model_path in ['wheeled_car.xml', 'wheeled_car_u_wall.xml']: self.car_radius = np.sqrt(2) * 0.34 elif model_path == 'wheeled_car_old.xml': self.car_radius = 0.30 else: raise NotImplementedError # set walls if model_path == 'wheeled_car_u_wall.xml': self.walls = [ Wall(0, -0.5, 0.75, 0.05, self.car_radius), Wall(0.70, 0.05, 0.05, 0.5, self.car_radius), Wall(-0.70, 0.05, 0.05, 0.5, self.car_radius), ] else: self.walls = [] self.reward_type = reward_type self.norm_order = norm_order self.car_low, self.car_high = np.array(car_low), np.array(car_high) self.goal_low, self.goal_high = np.array(goal_low), np.array(goal_high) if reset_low is None: self.reset_low = np.array(car_low) else: self.reset_low = np.array(reset_low) if reset_high is None: self.reset_high = np.array(car_high) else: self.reset_high = np.array(reset_high) self.car_low += self.car_radius self.car_high -= self.car_radius self.goal_low += self.car_radius self.goal_high -= self.car_radius self.reset_low += self.car_radius self.reset_high -= self.car_radius self.two_frames = two_frames self.vel_in_state = vel_in_state self.z_in_state = z_in_state # x and y pos obs_space_low = np.copy(self.car_low) obs_space_high = np.copy(self.car_high) goal_space_low = np.copy(self.goal_low) goal_space_high = np.copy(self.goal_high) # z pos if self.z_in_state: obs_space_low = np.concatenate((obs_space_low, [-1])) obs_space_high = np.concatenate((obs_space_high, [0.03])) goal_space_low = np.concatenate((goal_space_low, [0])) goal_space_high = np.concatenate((goal_space_high, [0])) # sin and cos obs_space_low = np.concatenate((obs_space_low, [-1, -1])) obs_space_high = np.concatenate((obs_space_high, [1, 1])) goal_space_low = np.concatenate((goal_space_low, [-1, -1])) goal_space_high = np.concatenate((goal_space_high, [1, 1])) if self.vel_in_state: # x and y vel obs_space_low = np.concatenate((obs_space_low, [-10, -10])) obs_space_high = np.concatenate((obs_space_high, [10, 10])) goal_space_low = np.concatenate((goal_space_low, [0, 0])) goal_space_high = np.concatenate((goal_space_high, [0, 0])) # z vel if self.z_in_state: obs_space_low = np.concatenate((obs_space_low, [-10])) obs_space_high = np.concatenate((obs_space_high, [10])) goal_space_low = np.concatenate((goal_space_low, [0])) goal_space_high = np.concatenate((goal_space_high, [0])) # theta vel obs_space_low = np.concatenate((obs_space_low, [-10])) obs_space_high = np.concatenate((obs_space_high, [10])) goal_space_low = np.concatenate((goal_space_low, [0])) goal_space_high = np.concatenate((goal_space_high, [0])) self.obs_space = Box(obs_space_low, obs_space_high, dtype=np.float32) self.goal_space = Box(goal_space_low, goal_space_high, dtype=np.float32) if self.two_frames: self.obs_space = concatenate_box_spaces(self.obs_space, self.obs_space) self.goal_space = concatenate_box_spaces(self.goal_space, self.goal_space) self.observation_space = Dict([ ('observation', self.obs_space), ('desired_goal', self.goal_space), ('achieved_goal', self.obs_space), ('state_observation', self.obs_space), ('state_desired_goal', self.goal_space), ('state_achieved_goal', self.obs_space), ('proprio_observation', self.obs_space), ('proprio_desired_goal', self.goal_space), ('proprio_achieved_goal', self.obs_space), ]) self._state_goal = None self._cur_obs = None self._prev_obs = None self.reset()
def __init__( self, wrapped_env, imsize=84, init_camera=None, transpose=False, grayscale=False, normalize=False, reward_type='wrapped_env', threshold=10, image_length=None, presampled_goals=None, non_presampled_goal_img_is_garbage=False, recompute_reward=True, ): """ :param wrapped_env: :param imsize: :param init_camera: :param transpose: :param grayscale: :param normalize: :param reward_type: :param threshold: :param image_length: :param presampled_goals: :param non_presampled_goal_img_is_garbage: Set this option to True if you want to allow the code to work without presampled goals, but where the underlying env doesn't support set_to_goal. As the name, implies this will make it so that the goal image is garbage if you don't provide pre-sampled goals. The main use case is if you want to use an ImageEnv to pre-sample a bunch of goals. """ self.quick_init(locals()) super().__init__(wrapped_env) self.imsize = imsize self.init_camera = init_camera self.transpose = transpose self.grayscale = grayscale self.normalize = normalize self.recompute_reward = recompute_reward self.non_presampled_goal_img_is_garbage = non_presampled_goal_img_is_garbage if image_length is not None: self.image_length = image_length else: if grayscale: self.image_length = self.imsize * self.imsize else: self.image_length = 3 * self.imsize * self.imsize self.channels = 1 if grayscale else 3 # This is torch format rather than PIL image self.image_shape = (self.imsize, self.imsize) # Flattened past image queue # init camera if init_camera is not None: self._wrapped_env.initialize_camera(init_camera) # viewer = mujoco_py.MjRenderContextOffscreen(sim, device_id=-1) # init_camera(viewer.cam) # sim.add_render_context(viewer) img_space = Box(0, 1, (self.image_length,), dtype=np.float32) self._img_goal = img_space.sample() #has to be done for presampling spaces = self.wrapped_env.observation_space.spaces.copy() spaces['observation'] = img_space spaces['desired_goal'] = img_space spaces['achieved_goal'] = img_space spaces['image_observation'] = img_space spaces['image_desired_goal'] = img_space spaces['image_achieved_goal'] = img_space self.return_image_proprio = False if 'proprio_observation' in spaces.keys(): self.return_image_proprio = True spaces['image_proprio_observation'] = concatenate_box_spaces( spaces['image_observation'], spaces['proprio_observation'] ) spaces['image_proprio_desired_goal'] = concatenate_box_spaces( spaces['image_desired_goal'], spaces['proprio_desired_goal'] ) spaces['image_proprio_achieved_goal'] = concatenate_box_spaces( spaces['image_achieved_goal'], spaces['proprio_achieved_goal'] ) self.observation_space = Dict(spaces) self.action_space = self.wrapped_env.action_space self.reward_type = reward_type self.threshold = threshold self._presampled_goals = presampled_goals if self._presampled_goals is None: self.num_goals_presampled = 0 else: self.num_goals_presampled = presampled_goals[random.choice(list(presampled_goals))].shape[0] self._last_image = None
def __init__( self, wrapped_env, imsize=84, init_camera=None, transpose=False, grayscale=False, normalize=False, reward_type='wrapped_env', threshold=10, image_length=None, presampled_goals=None, ): self.quick_init(locals()) super().__init__(wrapped_env) self.wrapped_env.hide_goal_markers = True self.imsize = imsize self.init_camera = init_camera self.transpose = transpose self.grayscale = grayscale self.normalize = normalize if image_length is not None: self.image_length = image_length else: if grayscale: self.image_length = self.imsize * self.imsize else: self.image_length = 3 * self.imsize * self.imsize self.channels = 1 if grayscale else 3 # This is torch format rather than PIL image self.image_shape = (self.imsize, self.imsize) # Flattened past image queue # init camera if init_camera is not None: sim = self._wrapped_env.initialize_camera(init_camera) # viewer = mujoco_py.MjRenderContextOffscreen(sim, device_id=-1) # init_camera(viewer.cam) # sim.add_render_context(viewer) self._render_local = False img_space = Box(0, 1, (self.image_length,)) self._img_goal = img_space.sample() #has to be done for presampling spaces = self.wrapped_env.observation_space.spaces spaces['observation'] = img_space spaces['desired_goal'] = img_space spaces['achieved_goal'] = img_space spaces['image_observation'] = img_space spaces['image_desired_goal'] = img_space spaces['image_achieved_goal'] = img_space self.return_image_proprio = False if 'proprio_observation' in spaces.keys(): self.return_image_proprio = True spaces['image_proprio_observation'] = concatenate_box_spaces( spaces['image_observation'], spaces['proprio_observation'] ) spaces['image_proprio_desired_goal'] = concatenate_box_spaces( spaces['image_desired_goal'], spaces['proprio_desired_goal'] ) spaces['image_proprio_achieved_goal'] = concatenate_box_spaces( spaces['image_achieved_goal'], spaces['proprio_achieved_goal'] ) self.observation_space = Dict(spaces) self.action_space = self.wrapped_env.action_space self.reward_type = reward_type self.threshold = threshold self._presampled_goals = presampled_goals if self._presampled_goals is None: self.num_goals_presampled = 0 else: self.num_goals_presampled = presampled_goals[random.choice(list(presampled_goals))].shape[0]