def __init__(self, num_inputs, num_outputs, use_state_dependent_std): super(LimitedEntGaussianActorLayer, self).__init__() self.actor_mean = nn.Linear(num_inputs, num_outputs) init(self.actor_mean, nn.init.orthogonal_, lambda x: nn.init.constant_(x, 0)) self.use_state_dependent_std = use_state_dependent_std if self.use_state_dependent_std: self.actor_logstd = nn.Linear(num_inputs, num_outputs) init(self.actor_logstd, nn.init.orthogonal_, lambda x: nn.init.constant_(x, 0)) else: self.logstd = nn.Parameter(torch.zeros(num_outputs), requires_grad=True)
def __init__(self, num_inputs, num_outputs, state_dependent_std, init_w=1e-3): super(TanhGaussainActorLayer, self).__init__() self.actor_mean = nn.Linear(num_inputs, num_outputs) init(self.actor_mean, lambda x: nn.init.uniform_(x, -init_w, init_w), lambda x: nn.init.uniform_(x, -init_w, init_w)) self.state_dependent_std = state_dependent_std if self.state_dependent_std: self.actor_logstd = nn.Linear(num_inputs, num_outputs) init(self.actor_mean, lambda x: nn.init.uniform_(x, -init_w, init_w), lambda x: nn.init.uniform_(x, -init_w, init_w)) else: self.logstd = nn.Parameter(torch.zeros(num_outputs), requires_grad=True)
def __init__(self, dim_state: int, hidden_dims: List[int], state_normalizer=None, activation='Tanh'): super(VCritic, self).__init__() self.critic = MLP(dim_state, 1, hidden_dims, activation=activation) self.normalizer = state_normalizer or nn.Identity() init_ = lambda m: init(m, normc_init, lambda x: nn.init.constant_( x, 0)) self.critic.init(init_, init_)
def __init__(self, state_dim: int, action_dim: int, hidden_dims: List[int], normalizer: Normalizers): super(Dynamics, self).__init__() self.dim_state = state_dim self.dim_action = action_dim self.normalizer = normalizer self.diff_dynamics = MLP(state_dim + action_dim, state_dim, hidden_dims, activation='ReLU') init_ = lambda m: init(m, truncated_norm_init, lambda x: nn.init. constant_(x, 0)) self.diff_dynamics.init(init_, init_)
def __init__(self, state_dim: int, action_space, hidden_dims: List[int], state_normalizer: Optional[nn.Module], use_limited_entropy=False): super(Actor, self).__init__() self.state_dim = state_dim self.action_dim = action_space self.hidden_dims = hidden_dims self.actor_feature = MLP(state_dim, hidden_dims[-1], hidden_dims[:-1], activation='Tanh', last_activation='Tanh') self.state_normalizer = state_normalizer or nn.Identity() if action_space.__class__.__name__ == "Discrete": action_dim = action_space.n self.actor = CategoricalActorLayer(hidden_dims[-1], action_dim) elif action_space.__class__.__name__ == "Box": action_dim = action_space.shape[0] if use_limited_entropy: self.actor = LimitedEntGaussianActorLayer( hidden_dims[-1], action_dim, use_state_dependent_std=False) else: self.actor = GaussianActorLayer(hidden_dims[-1], action_dim, use_state_dependent_std=False) elif action_space.__class__.__name__ == "MultiBinary": action_dim = action_space.shape[0] self.actor = BernoulliActorLayer(hidden_dims[-1], action_dim) else: raise NotImplemented init_ = lambda m: init(m, normc_init, lambda x: nn.init.constant_( x, 0)) self.actor_feature.init(init_, init_)
def __init__(self, dim_state, action_space, actor_hidden_dims: List[int], critic_hidden_dims: List[int], normalizer: nn.Module = None): super(ActorCritic, self).__init__() self.actor_feature = MLP(dim_state, actor_hidden_dims[-1], actor_hidden_dims[:-1], activation='Tanh', last_activation='Tanh') self.critic = MLP(dim_state, 1, critic_hidden_dims, activation='Tanh', last_activation='Identity') self.normalizer = normalizer or nn.Identity() init_ = lambda m: init(m, lambda x: nn.init.orthogonal_(x, np.sqrt(2)), lambda x: nn.init.constant_(x, 0)) self.actor_feature.init(init_, init_) self.critic.init(init_, init_) self.train() if action_space.__class__.__name__ == "Discrete": dim_action = action_space.n self.actor = CategoricalActorLayer(actor_hidden_dims[-1], dim_action) elif action_space.__class__.__name__ == "Box": dim_action = action_space.shape[0] self.actor = GaussianActorLayer(actor_hidden_dims[-1], dim_action, use_state_dependent_std=False) elif action_space.__class__.__name__ == "MultiBinary": dim_action = action_space.shape[0] self.actor = BernoulliActorLayer(actor_hidden_dims[-1], dim_action)
def __init__(self, num_inputs, num_outputs): super(CategoricalActorLayer, self).__init__() self.actor = nn.Linear(num_inputs, num_outputs) init(self.actor, lambda x: nn.init.orthogonal_(x, 0.01), lambda x: nn.init.constant_(x, 0))
def __init__(self, num_inputs, num_outputs): super(BernoulliActorLayer, self).__init__() self.actor = nn.Linear(num_inputs, num_outputs) init(self.actor, nn.init.orthogonal_, lambda x: nn.init.constant_(x, 0))