Example #1
0
class Dynamics(nn.Module):
    def __init__(self, state_dim: int, action_dim: int, hidden_dims: List[int],
                 normalizer: Normalizers):
        super(Dynamics, self).__init__()
        self.dim_state = state_dim
        self.dim_action = action_dim
        self.normalizer = normalizer
        self.diff_dynamics = MLP(state_dim + action_dim,
                                 state_dim,
                                 hidden_dims,
                                 activation='ReLU')

        init_ = lambda m: init(m, truncated_norm_init, lambda x: nn.init.
                               constant_(x, 0))
        self.diff_dynamics.init(init_, init_)

    def forward(self, state, action):
        # action clip is the best normalization according to the authors
        x = torch.cat(
            [self.normalizer.state_normalizer(state),
             action.clamp(-1., 1.)],
            dim=-1)
        normalized_diff = self.diff_dynamics(x)
        next_states = state + self.normalizer.diff_normalizer(normalized_diff,
                                                              inverse=True)
        next_states = self.normalizer.state_normalizer(
            self.normalizer.state_normalizer(next_states).clamp(-100, 100),
            inverse=True)
        return next_states
Example #2
0
class ActorCritic(nn.Module):

    def __init__(self, dim_state, action_space, actor_hidden_dims: List[int], critic_hidden_dims: List[int],
                 normalizer: nn.Module = None):
        super(ActorCritic, self).__init__()

        self.actor_feature = MLP(dim_state, actor_hidden_dims[-1], actor_hidden_dims[:-1],
                                 activation='Tanh', last_activation='Tanh')
        self.critic = MLP(dim_state, 1, critic_hidden_dims, activation='Tanh', last_activation='Identity')
        self.normalizer = normalizer or nn.Identity()

        init_ = lambda m: init(m, lambda x: nn.init.orthogonal_(x, np.sqrt(2)), lambda x: nn.init.constant_(x, 0))
        self.actor_feature.init(init_, init_)
        self.critic.init(init_, init_)

        self.train()

        if action_space.__class__.__name__ == "Discrete":
            dim_action = action_space.n
            self.actor = CategoricalActorLayer(actor_hidden_dims[-1], dim_action)
        elif action_space.__class__.__name__ == "Box":
            dim_action = action_space.shape[0]
            self.actor = GaussianActorLayer(actor_hidden_dims[-1], dim_action, use_state_dependent_std=False)
        elif action_space.__class__.__name__ == "MultiBinary":
            dim_action = action_space.shape[0]
            self.actor = BernoulliActorLayer(actor_hidden_dims[-1], dim_action)

    def act(self, states, deterministic=False, reparamterize=False):
        action_feature, value = self.actor_feature(states), self.critic(states)
        action_dist, *_ = self.actor(action_feature)

        if deterministic:
            action = action_dist.mode()
        else:
            if reparamterize:
                action = action_dist.rsample()
            else:
                action = action_dist.sample()

        action_log_prob = action_dist.log_probs(action)
        dist_entropy = action_dist.entropy().mean()

        return value, action, action_log_prob, dist_entropy

    def criticize(self, states):
        values = self.critic(states)
        return values

    def evaluate_action(self, state, action):
        action_feature, value = self.actor_feature(state), self.critic(state)
        action_dist = self.actor(action_feature)

        action_log_probs = action_dist.log_prob(action).sum(-1, keepdim=True)
        dist_entropy = action_dist.entropy().mean()

        return value, action_log_probs, dist_entropy
Example #3
0
    def __init__(self,
                 dim_state: int,
                 hidden_dims: List[int],
                 state_normalizer=None,
                 activation='Tanh'):
        super(VCritic, self).__init__()
        self.critic = MLP(dim_state, 1, hidden_dims, activation=activation)
        self.normalizer = state_normalizer or nn.Identity()

        init_ = lambda m: init(m, normc_init, lambda x: nn.init.constant_(
            x, 0))
        self.critic.init(init_, init_)
Example #4
0
    def __init__(self, state_dim: int, action_dim: int, hidden_dims: List[int],
                 normalizer: Normalizers):
        super(Dynamics, self).__init__()
        self.dim_state = state_dim
        self.dim_action = action_dim
        self.normalizer = normalizer
        self.diff_dynamics = MLP(state_dim + action_dim,
                                 state_dim,
                                 hidden_dims,
                                 activation='ReLU')

        init_ = lambda m: init(m, truncated_norm_init, lambda x: nn.init.
                               constant_(x, 0))
        self.diff_dynamics.init(init_, init_)
Example #5
0
    def __init__(self,
                 state_dim: int,
                 action_space,
                 hidden_dims: List[int],
                 state_normalizer: Optional[nn.Module],
                 use_limited_entropy=False):
        super(Actor, self).__init__()
        self.state_dim = state_dim
        self.action_dim = action_space
        self.hidden_dims = hidden_dims

        self.actor_feature = MLP(state_dim,
                                 hidden_dims[-1],
                                 hidden_dims[:-1],
                                 activation='Tanh',
                                 last_activation='Tanh')
        self.state_normalizer = state_normalizer or nn.Identity()

        if action_space.__class__.__name__ == "Discrete":
            action_dim = action_space.n
            self.actor = CategoricalActorLayer(hidden_dims[-1], action_dim)
        elif action_space.__class__.__name__ == "Box":
            action_dim = action_space.shape[0]
            if use_limited_entropy:
                self.actor = LimitedEntGaussianActorLayer(
                    hidden_dims[-1], action_dim, use_state_dependent_std=False)
            else:
                self.actor = GaussianActorLayer(hidden_dims[-1],
                                                action_dim,
                                                use_state_dependent_std=False)
        elif action_space.__class__.__name__ == "MultiBinary":
            action_dim = action_space.shape[0]
            self.actor = BernoulliActorLayer(hidden_dims[-1], action_dim)
        else:
            raise NotImplemented

        init_ = lambda m: init(m, normc_init, lambda x: nn.init.constant_(
            x, 0))
        self.actor_feature.init(init_, init_)
Example #6
0
    def __init__(self, dim_state, action_space, actor_hidden_dims: List[int], critic_hidden_dims: List[int],
                 normalizer: nn.Module = None):
        super(ActorCritic, self).__init__()

        self.actor_feature = MLP(dim_state, actor_hidden_dims[-1], actor_hidden_dims[:-1],
                                 activation='Tanh', last_activation='Tanh')
        self.critic = MLP(dim_state, 1, critic_hidden_dims, activation='Tanh', last_activation='Identity')
        self.normalizer = normalizer or nn.Identity()

        init_ = lambda m: init(m, lambda x: nn.init.orthogonal_(x, np.sqrt(2)), lambda x: nn.init.constant_(x, 0))
        self.actor_feature.init(init_, init_)
        self.critic.init(init_, init_)

        self.train()

        if action_space.__class__.__name__ == "Discrete":
            dim_action = action_space.n
            self.actor = CategoricalActorLayer(actor_hidden_dims[-1], dim_action)
        elif action_space.__class__.__name__ == "Box":
            dim_action = action_space.shape[0]
            self.actor = GaussianActorLayer(actor_hidden_dims[-1], dim_action, use_state_dependent_std=False)
        elif action_space.__class__.__name__ == "MultiBinary":
            dim_action = action_space.shape[0]
            self.actor = BernoulliActorLayer(actor_hidden_dims[-1], dim_action)
Example #7
0
 def __init__(self, dim_state: int, dim_action: int,
              hidden_states: List[int]):
     super(QCritic, self).__init__()
     self.critic = MLP(dim_state + dim_action, hidden_states, 1)
Example #8
0
class Actor(nn.Module):
    def __init__(self,
                 state_dim: int,
                 action_space,
                 hidden_dims: List[int],
                 state_normalizer: Optional[nn.Module],
                 use_limited_entropy=False):
        super(Actor, self).__init__()
        self.state_dim = state_dim
        self.action_dim = action_space
        self.hidden_dims = hidden_dims

        self.actor_feature = MLP(state_dim,
                                 hidden_dims[-1],
                                 hidden_dims[:-1],
                                 activation='Tanh',
                                 last_activation='Tanh')
        self.state_normalizer = state_normalizer or nn.Identity()

        if action_space.__class__.__name__ == "Discrete":
            action_dim = action_space.n
            self.actor = CategoricalActorLayer(hidden_dims[-1], action_dim)
        elif action_space.__class__.__name__ == "Box":
            action_dim = action_space.shape[0]
            if use_limited_entropy:
                self.actor = LimitedEntGaussianActorLayer(
                    hidden_dims[-1], action_dim, use_state_dependent_std=False)
            else:
                self.actor = GaussianActorLayer(hidden_dims[-1],
                                                action_dim,
                                                use_state_dependent_std=False)
        elif action_space.__class__.__name__ == "MultiBinary":
            action_dim = action_space.shape[0]
            self.actor = BernoulliActorLayer(hidden_dims[-1], action_dim)
        else:
            raise NotImplemented

        init_ = lambda m: init(m, normc_init, lambda x: nn.init.constant_(
            x, 0))
        self.actor_feature.init(init_, init_)

    def act(self, states, deterministic=False, reparamterize=False):
        states = self.state_normalizer(states)
        action_features = self.actor_feature(states)
        action_dists, action_means, log_stds = self.actor(action_features)

        if deterministic:
            actions = action_dists.mode()
        else:
            if reparamterize:
                actions = action_dists.rsample()
            else:
                actions = action_dists.sample()

        log_probs = action_dists.log_probs(actions)
        entropy = action_dists.entropy().mean()

        return actions, log_probs, entropy, action_means, log_stds, log_stds.exp(
        )

    def evaluate_action(self, states, actions):
        states = self.state_normalizer(states)
        action_feature = self.actor_feature(states)
        action_dist, *_ = self.actor(action_feature)

        log_probs = action_dist.log_probs(actions)
        entropy = action_dist.entropy().mean()

        return log_probs, entropy