Beispiel #1
0
def test_action_spaces(simple_env: SimpleHybrid):
    schedule = get_linear_fn(1, 0.1, 0.2)
    observation_space = Box(low=-1, high=143, shape=(3, ))
    action_space_false = OneHotHybrid([
        Box(low=-1, high=143, shape=(1, )),
        Box(low=1, high=1.2, shape=(2, )),
    ])

    with pytest.raises(AssertionError):
        PDQNMlpPolicy(observation_space, action_space_false, schedule,
                      schedule)
    with pytest.raises(AssertionError):
        MPDQNMlpPolicy(observation_space, action_space_false, schedule,
                       schedule)
Beispiel #2
0
    def __init__(self, observation_space: spaces.Box, features_dim: int = 512):
        super(NatureCNN, self).__init__(observation_space, features_dim)
        # We assume CxHxW images (channels first)
        # Re-ordering will be done by pre-preprocessing or wrapper
        assert is_image_space(observation_space), (
            "You should use NatureCNN "
            f"only with images not with {observation_space}\n"
            "(you are probably using `CnnPolicy` instead of `MlpPolicy`)\n"
            "If you are using a custom environment,\n"
            "please check it using our env checker:\n"
            "https://stable-baselines3.readthedocs.io/en/master/common/env_checker.html"
        )
        n_input_channels = observation_space.shape[0]
        self.cnn = nn.Sequential(
            nn.Conv2d(n_input_channels, 32, kernel_size=8, stride=4,
                      padding=0),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=4, stride=2, padding=0),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=0),
            nn.ReLU(),
            nn.Flatten(),
        )

        # Compute shape by doing one forward pass
        with th.no_grad():
            n_flatten = self.cnn(
                th.as_tensor(
                    observation_space.sample()[None]).float()).shape[1]

        self.linear = nn.Sequential(nn.Linear(n_flatten, features_dim),
                                    nn.ReLU())
Beispiel #3
0
def make_box(low: Optional[List] = None,
             high: Optional[List] = None,
             shape: Optional[Tuple] = None) -> Box:
    if shape is None:
        if (low is None) and (high is None):
            raise ValueError("Some value needs to be not none")
        else:
            low = np.array(low)
            high = np.array(high)
            return Box(low, high)
    else:
        if low is None:
            low = -np.inf
        if high is None:
            high = np.inf
        return Box(low, high, shape)
Beispiel #4
0
    def __init__(
        self,
        observation_space: Space,
        action_space: SimpleHybrid,
        lr_schedule_q: Schedule,
        lr_schedule_parameter: Schedule,
        net_arch_q: Optional[List[int]] = None,
        net_arch_parameter: Optional[List[int]] = None,
        activation_fn: Type[nn.Module] = nn.ReLU,
        features_extractor_class: Type[
            BaseFeaturesExtractor] = FlattenExtractor,
        features_extractor_kwargs: Optional[Dict[str, Any]] = None,
        normalize_images: bool = True,
        optimizer_class: Type[th.optim.Optimizer] = th.optim.Adam,
        optimizer_kwargs: Optional[Dict[str, Any]] = None,
    ):
        assert type(action_space) is SimpleHybrid

        self.action_space_parameter = Box(action_space.continuous_low,
                                          action_space.continuous_high)
        self.observation_space_q = build_state_parameter_space(
            observation_space, action_space)
        self.action_space_q = copy.copy(action_space[0])

        super(PDQNPolicy, self).__init__(
            observation_space,
            self.action_space_q,
            features_extractor_class,
            features_extractor_kwargs,
            optimizer_class=optimizer_class,
            optimizer_kwargs=optimizer_kwargs,
        )

        self.net_arch_q = self.get_net_arch(net_arch_q,
                                            features_extractor_class)
        self.net_arch_parameter = self.get_net_arch(net_arch_parameter,
                                                    features_extractor_class)

        self.activation_fn = activation_fn
        self.normalize_images = normalize_images

        self.net_args_q = {
            "observation_space": self.observation_space_q,
            "action_space": self.action_space_q,
            "net_arch": self.net_arch_q,
            "activation_fn": self.activation_fn,
            "normalize_images": normalize_images,
        }

        self.net_args_parameter = {
            "observation_space": self.observation_space,
            "action_space": self.action_space_parameter,
            "net_arch": self.net_arch_parameter,
            "activation_fn": self.activation_fn,
            "normalize_images": normalize_images,
        }

        self.q_net, self.q_net_target, self.parameter_net = None, None, None

        self._build(lr_schedule_q, lr_schedule_parameter)
Beispiel #5
0
class FakeImageEnv(Env):
    """
    Fake image environment for testing purposes, it mimics Atari games.

    :param action_dim: Number of discrete actions
    :param screen_height: Height of the image
    :param screen_width: Width of the image
    :param n_channels: Number of color channels
    :param discrete: Create discrete action space instead of continuous
    :param channel_first: Put channels on first axis instead of last
    """

    def __init__(
        self,
        action_dim: int = 6,
        screen_height: int = 84,
        screen_width: int = 84,
        n_channels: int = 1,
        discrete: bool = True,
        channel_first: bool = False,
    ):
        self.observation_shape = (screen_height, screen_width, n_channels)
        if channel_first:
            self.observation_shape = (n_channels, screen_height, screen_width)
        self.observation_space = Box(low=0, high=255, shape=self.observation_shape, dtype=np.uint8)
        if discrete:
            self.action_space = Discrete(action_dim)
        else:
            self.action_space = Box(low=-1, high=1, shape=(5,), dtype=np.float32)
        self.ep_length = 10
        self.current_step = 0

    def reset(self) -> np.ndarray:
        self.current_step = 0
        return self.observation_space.sample()

    def step(self, action: Union[np.ndarray, int]) -> GymStepReturn:
        reward = 0.0
        self.current_step += 1
        done = self.current_step >= self.ep_length
        return self.observation_space.sample(), reward, done, {}

    def render(self, mode: str = "human") -> None:
        pass
Beispiel #6
0
 def __init__(
     self,
     action_dim: int = 6,
     screen_height: int = 84,
     screen_width: int = 84,
     n_channels: int = 1,
     discrete: bool = True,
     channel_first: bool = False,
 ):
     self.observation_shape = (screen_height, screen_width, n_channels)
     if channel_first:
         self.observation_shape = (n_channels, screen_height, screen_width)
     self.observation_space = Box(low=0, high=255, shape=self.observation_shape, dtype=np.uint8)
     if discrete:
         self.action_space = Discrete(action_dim)
     else:
         self.action_space = Box(low=-1, high=1, shape=(5,), dtype=np.float32)
     self.ep_length = 10
     self.current_step = 0
Beispiel #7
0
    def __init__(self):

        self.max_move = np.float32(0.2)
        self.max_jump = 0.05
        self.n_obstacles = 3
        self.goal_position = 0.95
        self.obstacle_thickness = 0.01
        self.jump_threshold = 0.1
        self.max_timesteps = 30
        self.goal_threshold = 0.05
        self.action_space = SimpleHybrid([
            Box(-self.max_move, self.max_move, (1, )),
            Box(np.float32(0), np.float32(1), (1, ))
        ])
        self.observation_space = Box(low=np.zeros(7, dtype=np.float32),
                                     high=np.ones(7, dtype=np.float32))

        self.position = 0
        self.time = 0
        self.obstacle_position = np.zeros(self.n_obstacles)
        self.obstacle_target_height = np.zeros(self.n_obstacles)
Beispiel #8
0
    def __init__(self, low: float = -1.0, high: float = 1.0, eps: float = 0.05, ep_length: int = 100):
        """
        Identity environment for testing purposes

        :param low: the lower bound of the box dim
        :param high: the upper bound of the box dim
        :param eps: the epsilon bound for correct value
        :param ep_length: the length of each episode in timesteps
        """
        space = Box(low=low, high=high, shape=(1,), dtype=np.float32)
        super().__init__(ep_length=ep_length, space=space)
        self.eps = eps
Beispiel #9
0
class ActionDictTestEnv(gym.Env):
    action_space = Dict({"position": Discrete(1), "velocity": Discrete(1)})
    observation_space = Box(low=-1.0, high=2.0, shape=(3, ), dtype=np.float32)

    def step(self, action):
        observation = np.array([1.0, 1.5, 0.5])
        reward = 1
        done = True
        info = {}
        return observation, reward, done, info

    def reset(self):
        return np.array([1.0, 1.5, 0.5])

    def render(self, mode="human"):
        pass
Beispiel #10
0
def build_state_parameter_space(observation_space: Box,
                                action_space: SimpleHybrid) -> Box:
    lows = np.hstack([observation_space.low, action_space.continuous_low])
    highs = np.hstack([observation_space.high, action_space.continuous_high])

    return Box(lows, highs)
Beispiel #11
0
 def _build_observation_space(self) -> Box:
     return Box(-self.LIMIT, self.LIMIT, shape=(self.observation_dimension,))
Beispiel #12
0
 def _build_parameter_spaces(self) -> List[Box]:
     return [Box(-self.LIMIT, self.LIMIT, shape=(dimension,)) for dimension in self.parameter_dimensions]