def __init__( self, observation_space: spaces.Space, action_space: spaces.Space, net_arch: List[int], features_extractor: nn.Module, features_dim: int, activation_fn: Type[nn.Module] = nn.ReLU, normalize_images: bool = True, n_critics: int = 2, share_features_extractor: bool = True, ): super().__init__( observation_space, action_space, features_extractor=features_extractor, normalize_images=normalize_images, ) action_dim = get_action_dim(self.action_space) self.share_features_extractor = share_features_extractor self.n_critics = n_critics self.q_networks = [] for idx in range(n_critics): q_net = create_mlp(features_dim + action_dim, 1, net_arch, activation_fn) q_net = nn.Sequential(*q_net) self.add_module(f"qf{idx}", q_net) self.q_networks.append(q_net)
def make_proba_distribution( action_space: spaces.Space, use_sde: bool = False, dist_kwargs: Optional[Dict[str, Any]] = None) -> Distribution: """ Return an instance of Distribution for the correct type of action space :param action_space: the input action space :param use_sde: Force the use of StateDependentNoiseDistribution instead of DiagGaussianDistribution :param dist_kwargs: Keyword arguments to pass to the probability distribution :return: the appropriate Distribution object """ if dist_kwargs is None: dist_kwargs = {} if isinstance(action_space, spaces.Box): assert len(action_space.shape ) == 1, "Error: the action space must be a vector" cls = StateDependentNoiseDistribution if use_sde else DiagGaussianDistribution return cls(get_action_dim(action_space), **dist_kwargs) elif isinstance(action_space, spaces.Discrete): return CategoricalDistribution(action_space.n, **dist_kwargs) elif isinstance(action_space, spaces.MultiDiscrete): return MultiCategoricalDistribution(action_space.nvec, **dist_kwargs) elif isinstance(action_space, spaces.MultiBinary): return BernoulliDistribution(action_space.n, **dist_kwargs) elif isinstance(action_space, SimpleHybrid): return HybridDistribution(action_space) else: raise NotImplementedError( "Error: probability distribution, not implemented for action space" f"of type {type(action_space)}." " Must be of type Gym Spaces: Box, Discrete, MultiDiscrete or MultiBinary." )
def __init__( self, observation_space: spaces.Space, action_space: spaces.Space, net_arch: List[int], features_extractor: nn.Module, features_dim: int, activation_fn: Type[nn.Module] = nn.ReLU, normalize_images: bool = True, ): super(Actor, self).__init__( observation_space, action_space, features_extractor=features_extractor, normalize_images=normalize_images, squash_output=True, ) self.net_arch = net_arch self.features_dim = features_dim self.activation_fn = activation_fn action_dim = get_action_dim(self.action_space) actor_net = create_mlp(features_dim, action_dim, net_arch, activation_fn, squash_output=True) # Deterministic action self.mu = nn.Sequential(*actor_net)
def __init__( self, buffer_size: int, observation_space: spaces.Space, action_space: spaces.Space, device: Union[th.device, str] = "cpu", n_envs: int = 1, ): super(BaseBuffer, self).__init__() self.buffer_size = buffer_size self.observation_space = observation_space self.action_space = action_space self.obs_shape = get_obs_shape(observation_space) self.action_dim = get_action_dim(action_space) self.pos = 0 self.full = False self.device = device self.n_envs = n_envs
def __init__( self, observation_space: spaces.Space, action_space: spaces.Space, net_arch: List[int], features_extractor: nn.Module, features_dim: int, activation_fn: Type[nn.Module] = nn.ReLU, use_sde: bool = False, log_std_init: float = -3, full_std: bool = True, sde_net_arch: Optional[List[int]] = None, use_expln: bool = False, clip_mean: float = 2.0, normalize_images: bool = True, ): super(Actor, self).__init__( observation_space, action_space, features_extractor=features_extractor, normalize_images=normalize_images, squash_output=True, ) # Save arguments to re-create object at loading self.use_sde = use_sde self.sde_features_extractor = None self.sde_net_arch = sde_net_arch self.net_arch = net_arch self.features_dim = features_dim self.activation_fn = activation_fn self.log_std_init = log_std_init self.sde_net_arch = sde_net_arch self.use_expln = use_expln self.full_std = full_std self.clip_mean = clip_mean action_dim = get_action_dim(self.action_space) latent_pi_net = create_mlp(features_dim, -1, net_arch, activation_fn) self.latent_pi = nn.Sequential(*latent_pi_net) last_layer_dim = net_arch[-1] if len(net_arch) > 0 else features_dim if self.use_sde: latent_sde_dim = last_layer_dim # Separate features extractor for gSDE if sde_net_arch is not None: self.sde_features_extractor, latent_sde_dim = create_sde_features_extractor( features_dim, sde_net_arch, activation_fn) self.action_dist = StateDependentNoiseDistribution( action_dim, full_std=full_std, use_expln=use_expln, learn_features=True, squash_output=True) self.mu, self.log_std = self.action_dist.proba_distribution_net( latent_dim=last_layer_dim, latent_sde_dim=latent_sde_dim, log_std_init=log_std_init) # Avoid numerical issues by limiting the mean of the Gaussian # to be in [-clip_mean, clip_mean] if clip_mean > 0.0: self.mu = nn.Sequential( self.mu, nn.Hardtanh(min_val=-clip_mean, max_val=clip_mean)) else: self.action_dist = SquashedDiagGaussianDistribution(action_dim) self.mu = nn.Linear(last_layer_dim, action_dim) self.log_std = nn.Linear(last_layer_dim, action_dim)