def test_action_spaces(simple_env: SimpleHybrid): schedule = get_linear_fn(1, 0.1, 0.2) observation_space = Box(low=-1, high=143, shape=(3, )) action_space_false = OneHotHybrid([ Box(low=-1, high=143, shape=(1, )), Box(low=1, high=1.2, shape=(2, )), ]) with pytest.raises(AssertionError): PDQNMlpPolicy(observation_space, action_space_false, schedule, schedule) with pytest.raises(AssertionError): MPDQNMlpPolicy(observation_space, action_space_false, schedule, schedule)
def make_box(low: Optional[List] = None, high: Optional[List] = None, shape: Optional[Tuple] = None) -> Box: if shape is None: if (low is None) and (high is None): raise ValueError("Some value needs to be not none") else: low = np.array(low) high = np.array(high) return Box(low, high) else: if low is None: low = -np.inf if high is None: high = np.inf return Box(low, high, shape)
def __init__( self, observation_space: Space, action_space: SimpleHybrid, lr_schedule_q: Schedule, lr_schedule_parameter: Schedule, net_arch_q: Optional[List[int]] = None, net_arch_parameter: Optional[List[int]] = None, activation_fn: Type[nn.Module] = nn.ReLU, features_extractor_class: Type[ BaseFeaturesExtractor] = FlattenExtractor, features_extractor_kwargs: Optional[Dict[str, Any]] = None, normalize_images: bool = True, optimizer_class: Type[th.optim.Optimizer] = th.optim.Adam, optimizer_kwargs: Optional[Dict[str, Any]] = None, ): assert type(action_space) is SimpleHybrid self.action_space_parameter = Box(action_space.continuous_low, action_space.continuous_high) self.observation_space_q = build_state_parameter_space( observation_space, action_space) self.action_space_q = copy.copy(action_space[0]) super(PDQNPolicy, self).__init__( observation_space, self.action_space_q, features_extractor_class, features_extractor_kwargs, optimizer_class=optimizer_class, optimizer_kwargs=optimizer_kwargs, ) self.net_arch_q = self.get_net_arch(net_arch_q, features_extractor_class) self.net_arch_parameter = self.get_net_arch(net_arch_parameter, features_extractor_class) self.activation_fn = activation_fn self.normalize_images = normalize_images self.net_args_q = { "observation_space": self.observation_space_q, "action_space": self.action_space_q, "net_arch": self.net_arch_q, "activation_fn": self.activation_fn, "normalize_images": normalize_images, } self.net_args_parameter = { "observation_space": self.observation_space, "action_space": self.action_space_parameter, "net_arch": self.net_arch_parameter, "activation_fn": self.activation_fn, "normalize_images": normalize_images, } self.q_net, self.q_net_target, self.parameter_net = None, None, None self._build(lr_schedule_q, lr_schedule_parameter)
def __init__( self, action_dim: int = 6, screen_height: int = 84, screen_width: int = 84, n_channels: int = 1, discrete: bool = True, channel_first: bool = False, ): self.observation_shape = (screen_height, screen_width, n_channels) if channel_first: self.observation_shape = (n_channels, screen_height, screen_width) self.observation_space = Box(low=0, high=255, shape=self.observation_shape, dtype=np.uint8) if discrete: self.action_space = Discrete(action_dim) else: self.action_space = Box(low=-1, high=1, shape=(5,), dtype=np.float32) self.ep_length = 10 self.current_step = 0
def __init__(self): self.max_move = np.float32(0.2) self.max_jump = 0.05 self.n_obstacles = 3 self.goal_position = 0.95 self.obstacle_thickness = 0.01 self.jump_threshold = 0.1 self.max_timesteps = 30 self.goal_threshold = 0.05 self.action_space = SimpleHybrid([ Box(-self.max_move, self.max_move, (1, )), Box(np.float32(0), np.float32(1), (1, )) ]) self.observation_space = Box(low=np.zeros(7, dtype=np.float32), high=np.ones(7, dtype=np.float32)) self.position = 0 self.time = 0 self.obstacle_position = np.zeros(self.n_obstacles) self.obstacle_target_height = np.zeros(self.n_obstacles)
def __init__(self, low: float = -1.0, high: float = 1.0, eps: float = 0.05, ep_length: int = 100): """ Identity environment for testing purposes :param low: the lower bound of the box dim :param high: the upper bound of the box dim :param eps: the epsilon bound for correct value :param ep_length: the length of each episode in timesteps """ space = Box(low=low, high=high, shape=(1,), dtype=np.float32) super().__init__(ep_length=ep_length, space=space) self.eps = eps
class ActionDictTestEnv(gym.Env): action_space = Dict({"position": Discrete(1), "velocity": Discrete(1)}) observation_space = Box(low=-1.0, high=2.0, shape=(3, ), dtype=np.float32) def step(self, action): observation = np.array([1.0, 1.5, 0.5]) reward = 1 done = True info = {} return observation, reward, done, info def reset(self): return np.array([1.0, 1.5, 0.5]) def render(self, mode="human"): pass
def build_state_parameter_space(observation_space: Box, action_space: SimpleHybrid) -> Box: lows = np.hstack([observation_space.low, action_space.continuous_low]) highs = np.hstack([observation_space.high, action_space.continuous_high]) return Box(lows, highs)
def _build_observation_space(self) -> Box: return Box(-self.LIMIT, self.LIMIT, shape=(self.observation_dimension,))
def _build_parameter_spaces(self) -> List[Box]: return [Box(-self.LIMIT, self.LIMIT, shape=(dimension,)) for dimension in self.parameter_dimensions]