def test_action_spaces(simple_env: SimpleHybrid): schedule = get_linear_fn(1, 0.1, 0.2) observation_space = Box(low=-1, high=143, shape=(3, )) action_space_false = OneHotHybrid([ Box(low=-1, high=143, shape=(1, )), Box(low=1, high=1.2, shape=(2, )), ]) with pytest.raises(AssertionError): PDQNMlpPolicy(observation_space, action_space_false, schedule, schedule) with pytest.raises(AssertionError): MPDQNMlpPolicy(observation_space, action_space_false, schedule, schedule)
def __init__(self, observation_space: spaces.Box, features_dim: int = 512): super(NatureCNN, self).__init__(observation_space, features_dim) # We assume CxHxW images (channels first) # Re-ordering will be done by pre-preprocessing or wrapper assert is_image_space(observation_space), ( "You should use NatureCNN " f"only with images not with {observation_space}\n" "(you are probably using `CnnPolicy` instead of `MlpPolicy`)\n" "If you are using a custom environment,\n" "please check it using our env checker:\n" "https://stable-baselines3.readthedocs.io/en/master/common/env_checker.html" ) n_input_channels = observation_space.shape[0] self.cnn = nn.Sequential( nn.Conv2d(n_input_channels, 32, kernel_size=8, stride=4, padding=0), nn.ReLU(), nn.Conv2d(32, 64, kernel_size=4, stride=2, padding=0), nn.ReLU(), nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=0), nn.ReLU(), nn.Flatten(), ) # Compute shape by doing one forward pass with th.no_grad(): n_flatten = self.cnn( th.as_tensor( observation_space.sample()[None]).float()).shape[1] self.linear = nn.Sequential(nn.Linear(n_flatten, features_dim), nn.ReLU())
def make_box(low: Optional[List] = None, high: Optional[List] = None, shape: Optional[Tuple] = None) -> Box: if shape is None: if (low is None) and (high is None): raise ValueError("Some value needs to be not none") else: low = np.array(low) high = np.array(high) return Box(low, high) else: if low is None: low = -np.inf if high is None: high = np.inf return Box(low, high, shape)
def __init__( self, observation_space: Space, action_space: SimpleHybrid, lr_schedule_q: Schedule, lr_schedule_parameter: Schedule, net_arch_q: Optional[List[int]] = None, net_arch_parameter: Optional[List[int]] = None, activation_fn: Type[nn.Module] = nn.ReLU, features_extractor_class: Type[ BaseFeaturesExtractor] = FlattenExtractor, features_extractor_kwargs: Optional[Dict[str, Any]] = None, normalize_images: bool = True, optimizer_class: Type[th.optim.Optimizer] = th.optim.Adam, optimizer_kwargs: Optional[Dict[str, Any]] = None, ): assert type(action_space) is SimpleHybrid self.action_space_parameter = Box(action_space.continuous_low, action_space.continuous_high) self.observation_space_q = build_state_parameter_space( observation_space, action_space) self.action_space_q = copy.copy(action_space[0]) super(PDQNPolicy, self).__init__( observation_space, self.action_space_q, features_extractor_class, features_extractor_kwargs, optimizer_class=optimizer_class, optimizer_kwargs=optimizer_kwargs, ) self.net_arch_q = self.get_net_arch(net_arch_q, features_extractor_class) self.net_arch_parameter = self.get_net_arch(net_arch_parameter, features_extractor_class) self.activation_fn = activation_fn self.normalize_images = normalize_images self.net_args_q = { "observation_space": self.observation_space_q, "action_space": self.action_space_q, "net_arch": self.net_arch_q, "activation_fn": self.activation_fn, "normalize_images": normalize_images, } self.net_args_parameter = { "observation_space": self.observation_space, "action_space": self.action_space_parameter, "net_arch": self.net_arch_parameter, "activation_fn": self.activation_fn, "normalize_images": normalize_images, } self.q_net, self.q_net_target, self.parameter_net = None, None, None self._build(lr_schedule_q, lr_schedule_parameter)
class FakeImageEnv(Env): """ Fake image environment for testing purposes, it mimics Atari games. :param action_dim: Number of discrete actions :param screen_height: Height of the image :param screen_width: Width of the image :param n_channels: Number of color channels :param discrete: Create discrete action space instead of continuous :param channel_first: Put channels on first axis instead of last """ def __init__( self, action_dim: int = 6, screen_height: int = 84, screen_width: int = 84, n_channels: int = 1, discrete: bool = True, channel_first: bool = False, ): self.observation_shape = (screen_height, screen_width, n_channels) if channel_first: self.observation_shape = (n_channels, screen_height, screen_width) self.observation_space = Box(low=0, high=255, shape=self.observation_shape, dtype=np.uint8) if discrete: self.action_space = Discrete(action_dim) else: self.action_space = Box(low=-1, high=1, shape=(5,), dtype=np.float32) self.ep_length = 10 self.current_step = 0 def reset(self) -> np.ndarray: self.current_step = 0 return self.observation_space.sample() def step(self, action: Union[np.ndarray, int]) -> GymStepReturn: reward = 0.0 self.current_step += 1 done = self.current_step >= self.ep_length return self.observation_space.sample(), reward, done, {} def render(self, mode: str = "human") -> None: pass
def __init__( self, action_dim: int = 6, screen_height: int = 84, screen_width: int = 84, n_channels: int = 1, discrete: bool = True, channel_first: bool = False, ): self.observation_shape = (screen_height, screen_width, n_channels) if channel_first: self.observation_shape = (n_channels, screen_height, screen_width) self.observation_space = Box(low=0, high=255, shape=self.observation_shape, dtype=np.uint8) if discrete: self.action_space = Discrete(action_dim) else: self.action_space = Box(low=-1, high=1, shape=(5,), dtype=np.float32) self.ep_length = 10 self.current_step = 0
def __init__(self): self.max_move = np.float32(0.2) self.max_jump = 0.05 self.n_obstacles = 3 self.goal_position = 0.95 self.obstacle_thickness = 0.01 self.jump_threshold = 0.1 self.max_timesteps = 30 self.goal_threshold = 0.05 self.action_space = SimpleHybrid([ Box(-self.max_move, self.max_move, (1, )), Box(np.float32(0), np.float32(1), (1, )) ]) self.observation_space = Box(low=np.zeros(7, dtype=np.float32), high=np.ones(7, dtype=np.float32)) self.position = 0 self.time = 0 self.obstacle_position = np.zeros(self.n_obstacles) self.obstacle_target_height = np.zeros(self.n_obstacles)
def __init__(self, low: float = -1.0, high: float = 1.0, eps: float = 0.05, ep_length: int = 100): """ Identity environment for testing purposes :param low: the lower bound of the box dim :param high: the upper bound of the box dim :param eps: the epsilon bound for correct value :param ep_length: the length of each episode in timesteps """ space = Box(low=low, high=high, shape=(1,), dtype=np.float32) super().__init__(ep_length=ep_length, space=space) self.eps = eps
class ActionDictTestEnv(gym.Env): action_space = Dict({"position": Discrete(1), "velocity": Discrete(1)}) observation_space = Box(low=-1.0, high=2.0, shape=(3, ), dtype=np.float32) def step(self, action): observation = np.array([1.0, 1.5, 0.5]) reward = 1 done = True info = {} return observation, reward, done, info def reset(self): return np.array([1.0, 1.5, 0.5]) def render(self, mode="human"): pass
def build_state_parameter_space(observation_space: Box, action_space: SimpleHybrid) -> Box: lows = np.hstack([observation_space.low, action_space.continuous_low]) highs = np.hstack([observation_space.high, action_space.continuous_high]) return Box(lows, highs)
def _build_observation_space(self) -> Box: return Box(-self.LIMIT, self.LIMIT, shape=(self.observation_dimension,))
def _build_parameter_spaces(self) -> List[Box]: return [Box(-self.LIMIT, self.LIMIT, shape=(dimension,)) for dimension in self.parameter_dimensions]