class Dynamics(nn.Module): def __init__(self, state_dim: int, action_dim: int, hidden_dims: List[int], normalizer: Normalizers): super(Dynamics, self).__init__() self.dim_state = state_dim self.dim_action = action_dim self.normalizer = normalizer self.diff_dynamics = MLP(state_dim + action_dim, state_dim, hidden_dims, activation='ReLU') init_ = lambda m: init(m, truncated_norm_init, lambda x: nn.init. constant_(x, 0)) self.diff_dynamics.init(init_, init_) def forward(self, state, action): # action clip is the best normalization according to the authors x = torch.cat( [self.normalizer.state_normalizer(state), action.clamp(-1., 1.)], dim=-1) normalized_diff = self.diff_dynamics(x) next_states = state + self.normalizer.diff_normalizer(normalized_diff, inverse=True) next_states = self.normalizer.state_normalizer( self.normalizer.state_normalizer(next_states).clamp(-100, 100), inverse=True) return next_states
class ActorCritic(nn.Module): def __init__(self, dim_state, action_space, actor_hidden_dims: List[int], critic_hidden_dims: List[int], normalizer: nn.Module = None): super(ActorCritic, self).__init__() self.actor_feature = MLP(dim_state, actor_hidden_dims[-1], actor_hidden_dims[:-1], activation='Tanh', last_activation='Tanh') self.critic = MLP(dim_state, 1, critic_hidden_dims, activation='Tanh', last_activation='Identity') self.normalizer = normalizer or nn.Identity() init_ = lambda m: init(m, lambda x: nn.init.orthogonal_(x, np.sqrt(2)), lambda x: nn.init.constant_(x, 0)) self.actor_feature.init(init_, init_) self.critic.init(init_, init_) self.train() if action_space.__class__.__name__ == "Discrete": dim_action = action_space.n self.actor = CategoricalActorLayer(actor_hidden_dims[-1], dim_action) elif action_space.__class__.__name__ == "Box": dim_action = action_space.shape[0] self.actor = GaussianActorLayer(actor_hidden_dims[-1], dim_action, use_state_dependent_std=False) elif action_space.__class__.__name__ == "MultiBinary": dim_action = action_space.shape[0] self.actor = BernoulliActorLayer(actor_hidden_dims[-1], dim_action) def act(self, states, deterministic=False, reparamterize=False): action_feature, value = self.actor_feature(states), self.critic(states) action_dist, *_ = self.actor(action_feature) if deterministic: action = action_dist.mode() else: if reparamterize: action = action_dist.rsample() else: action = action_dist.sample() action_log_prob = action_dist.log_probs(action) dist_entropy = action_dist.entropy().mean() return value, action, action_log_prob, dist_entropy def criticize(self, states): values = self.critic(states) return values def evaluate_action(self, state, action): action_feature, value = self.actor_feature(state), self.critic(state) action_dist = self.actor(action_feature) action_log_probs = action_dist.log_prob(action).sum(-1, keepdim=True) dist_entropy = action_dist.entropy().mean() return value, action_log_probs, dist_entropy
def __init__(self, dim_state: int, hidden_dims: List[int], state_normalizer=None, activation='Tanh'): super(VCritic, self).__init__() self.critic = MLP(dim_state, 1, hidden_dims, activation=activation) self.normalizer = state_normalizer or nn.Identity() init_ = lambda m: init(m, normc_init, lambda x: nn.init.constant_( x, 0)) self.critic.init(init_, init_)
def __init__(self, state_dim: int, action_dim: int, hidden_dims: List[int], normalizer: Normalizers): super(Dynamics, self).__init__() self.dim_state = state_dim self.dim_action = action_dim self.normalizer = normalizer self.diff_dynamics = MLP(state_dim + action_dim, state_dim, hidden_dims, activation='ReLU') init_ = lambda m: init(m, truncated_norm_init, lambda x: nn.init. constant_(x, 0)) self.diff_dynamics.init(init_, init_)
def __init__(self, state_dim: int, action_space, hidden_dims: List[int], state_normalizer: Optional[nn.Module], use_limited_entropy=False): super(Actor, self).__init__() self.state_dim = state_dim self.action_dim = action_space self.hidden_dims = hidden_dims self.actor_feature = MLP(state_dim, hidden_dims[-1], hidden_dims[:-1], activation='Tanh', last_activation='Tanh') self.state_normalizer = state_normalizer or nn.Identity() if action_space.__class__.__name__ == "Discrete": action_dim = action_space.n self.actor = CategoricalActorLayer(hidden_dims[-1], action_dim) elif action_space.__class__.__name__ == "Box": action_dim = action_space.shape[0] if use_limited_entropy: self.actor = LimitedEntGaussianActorLayer( hidden_dims[-1], action_dim, use_state_dependent_std=False) else: self.actor = GaussianActorLayer(hidden_dims[-1], action_dim, use_state_dependent_std=False) elif action_space.__class__.__name__ == "MultiBinary": action_dim = action_space.shape[0] self.actor = BernoulliActorLayer(hidden_dims[-1], action_dim) else: raise NotImplemented init_ = lambda m: init(m, normc_init, lambda x: nn.init.constant_( x, 0)) self.actor_feature.init(init_, init_)
def __init__(self, dim_state, action_space, actor_hidden_dims: List[int], critic_hidden_dims: List[int], normalizer: nn.Module = None): super(ActorCritic, self).__init__() self.actor_feature = MLP(dim_state, actor_hidden_dims[-1], actor_hidden_dims[:-1], activation='Tanh', last_activation='Tanh') self.critic = MLP(dim_state, 1, critic_hidden_dims, activation='Tanh', last_activation='Identity') self.normalizer = normalizer or nn.Identity() init_ = lambda m: init(m, lambda x: nn.init.orthogonal_(x, np.sqrt(2)), lambda x: nn.init.constant_(x, 0)) self.actor_feature.init(init_, init_) self.critic.init(init_, init_) self.train() if action_space.__class__.__name__ == "Discrete": dim_action = action_space.n self.actor = CategoricalActorLayer(actor_hidden_dims[-1], dim_action) elif action_space.__class__.__name__ == "Box": dim_action = action_space.shape[0] self.actor = GaussianActorLayer(actor_hidden_dims[-1], dim_action, use_state_dependent_std=False) elif action_space.__class__.__name__ == "MultiBinary": dim_action = action_space.shape[0] self.actor = BernoulliActorLayer(actor_hidden_dims[-1], dim_action)
def __init__(self, dim_state: int, dim_action: int, hidden_states: List[int]): super(QCritic, self).__init__() self.critic = MLP(dim_state + dim_action, hidden_states, 1)
class Actor(nn.Module): def __init__(self, state_dim: int, action_space, hidden_dims: List[int], state_normalizer: Optional[nn.Module], use_limited_entropy=False): super(Actor, self).__init__() self.state_dim = state_dim self.action_dim = action_space self.hidden_dims = hidden_dims self.actor_feature = MLP(state_dim, hidden_dims[-1], hidden_dims[:-1], activation='Tanh', last_activation='Tanh') self.state_normalizer = state_normalizer or nn.Identity() if action_space.__class__.__name__ == "Discrete": action_dim = action_space.n self.actor = CategoricalActorLayer(hidden_dims[-1], action_dim) elif action_space.__class__.__name__ == "Box": action_dim = action_space.shape[0] if use_limited_entropy: self.actor = LimitedEntGaussianActorLayer( hidden_dims[-1], action_dim, use_state_dependent_std=False) else: self.actor = GaussianActorLayer(hidden_dims[-1], action_dim, use_state_dependent_std=False) elif action_space.__class__.__name__ == "MultiBinary": action_dim = action_space.shape[0] self.actor = BernoulliActorLayer(hidden_dims[-1], action_dim) else: raise NotImplemented init_ = lambda m: init(m, normc_init, lambda x: nn.init.constant_( x, 0)) self.actor_feature.init(init_, init_) def act(self, states, deterministic=False, reparamterize=False): states = self.state_normalizer(states) action_features = self.actor_feature(states) action_dists, action_means, log_stds = self.actor(action_features) if deterministic: actions = action_dists.mode() else: if reparamterize: actions = action_dists.rsample() else: actions = action_dists.sample() log_probs = action_dists.log_probs(actions) entropy = action_dists.entropy().mean() return actions, log_probs, entropy, action_means, log_stds, log_stds.exp( ) def evaluate_action(self, states, actions): states = self.state_normalizer(states) action_feature = self.actor_feature(states) action_dist, *_ = self.actor(action_feature) log_probs = action_dist.log_probs(actions) entropy = action_dist.entropy().mean() return log_probs, entropy