Beispiel #1
0
    def __init__(self, num_inputs, num_outputs, use_state_dependent_std):
        super(LimitedEntGaussianActorLayer, self).__init__()

        self.actor_mean = nn.Linear(num_inputs, num_outputs)
        init(self.actor_mean, nn.init.orthogonal_,
             lambda x: nn.init.constant_(x, 0))
        self.use_state_dependent_std = use_state_dependent_std
        if self.use_state_dependent_std:
            self.actor_logstd = nn.Linear(num_inputs, num_outputs)
            init(self.actor_logstd, nn.init.orthogonal_,
                 lambda x: nn.init.constant_(x, 0))

        else:
            self.logstd = nn.Parameter(torch.zeros(num_outputs),
                                       requires_grad=True)
Beispiel #2
0
    def __init__(self,
                 num_inputs,
                 num_outputs,
                 state_dependent_std,
                 init_w=1e-3):
        super(TanhGaussainActorLayer, self).__init__()

        self.actor_mean = nn.Linear(num_inputs, num_outputs)
        init(self.actor_mean, lambda x: nn.init.uniform_(x, -init_w, init_w),
             lambda x: nn.init.uniform_(x, -init_w, init_w))
        self.state_dependent_std = state_dependent_std
        if self.state_dependent_std:
            self.actor_logstd = nn.Linear(num_inputs, num_outputs)
            init(self.actor_mean,
                 lambda x: nn.init.uniform_(x, -init_w, init_w),
                 lambda x: nn.init.uniform_(x, -init_w, init_w))
        else:
            self.logstd = nn.Parameter(torch.zeros(num_outputs),
                                       requires_grad=True)
Beispiel #3
0
    def __init__(self,
                 dim_state: int,
                 hidden_dims: List[int],
                 state_normalizer=None,
                 activation='Tanh'):
        super(VCritic, self).__init__()
        self.critic = MLP(dim_state, 1, hidden_dims, activation=activation)
        self.normalizer = state_normalizer or nn.Identity()

        init_ = lambda m: init(m, normc_init, lambda x: nn.init.constant_(
            x, 0))
        self.critic.init(init_, init_)
Beispiel #4
0
    def __init__(self, state_dim: int, action_dim: int, hidden_dims: List[int],
                 normalizer: Normalizers):
        super(Dynamics, self).__init__()
        self.dim_state = state_dim
        self.dim_action = action_dim
        self.normalizer = normalizer
        self.diff_dynamics = MLP(state_dim + action_dim,
                                 state_dim,
                                 hidden_dims,
                                 activation='ReLU')

        init_ = lambda m: init(m, truncated_norm_init, lambda x: nn.init.
                               constant_(x, 0))
        self.diff_dynamics.init(init_, init_)
Beispiel #5
0
    def __init__(self,
                 state_dim: int,
                 action_space,
                 hidden_dims: List[int],
                 state_normalizer: Optional[nn.Module],
                 use_limited_entropy=False):
        super(Actor, self).__init__()
        self.state_dim = state_dim
        self.action_dim = action_space
        self.hidden_dims = hidden_dims

        self.actor_feature = MLP(state_dim,
                                 hidden_dims[-1],
                                 hidden_dims[:-1],
                                 activation='Tanh',
                                 last_activation='Tanh')
        self.state_normalizer = state_normalizer or nn.Identity()

        if action_space.__class__.__name__ == "Discrete":
            action_dim = action_space.n
            self.actor = CategoricalActorLayer(hidden_dims[-1], action_dim)
        elif action_space.__class__.__name__ == "Box":
            action_dim = action_space.shape[0]
            if use_limited_entropy:
                self.actor = LimitedEntGaussianActorLayer(
                    hidden_dims[-1], action_dim, use_state_dependent_std=False)
            else:
                self.actor = GaussianActorLayer(hidden_dims[-1],
                                                action_dim,
                                                use_state_dependent_std=False)
        elif action_space.__class__.__name__ == "MultiBinary":
            action_dim = action_space.shape[0]
            self.actor = BernoulliActorLayer(hidden_dims[-1], action_dim)
        else:
            raise NotImplemented

        init_ = lambda m: init(m, normc_init, lambda x: nn.init.constant_(
            x, 0))
        self.actor_feature.init(init_, init_)
Beispiel #6
0
    def __init__(self, dim_state, action_space, actor_hidden_dims: List[int], critic_hidden_dims: List[int],
                 normalizer: nn.Module = None):
        super(ActorCritic, self).__init__()

        self.actor_feature = MLP(dim_state, actor_hidden_dims[-1], actor_hidden_dims[:-1],
                                 activation='Tanh', last_activation='Tanh')
        self.critic = MLP(dim_state, 1, critic_hidden_dims, activation='Tanh', last_activation='Identity')
        self.normalizer = normalizer or nn.Identity()

        init_ = lambda m: init(m, lambda x: nn.init.orthogonal_(x, np.sqrt(2)), lambda x: nn.init.constant_(x, 0))
        self.actor_feature.init(init_, init_)
        self.critic.init(init_, init_)

        self.train()

        if action_space.__class__.__name__ == "Discrete":
            dim_action = action_space.n
            self.actor = CategoricalActorLayer(actor_hidden_dims[-1], dim_action)
        elif action_space.__class__.__name__ == "Box":
            dim_action = action_space.shape[0]
            self.actor = GaussianActorLayer(actor_hidden_dims[-1], dim_action, use_state_dependent_std=False)
        elif action_space.__class__.__name__ == "MultiBinary":
            dim_action = action_space.shape[0]
            self.actor = BernoulliActorLayer(actor_hidden_dims[-1], dim_action)
Beispiel #7
0
    def __init__(self, num_inputs, num_outputs):
        super(CategoricalActorLayer, self).__init__()

        self.actor = nn.Linear(num_inputs, num_outputs)
        init(self.actor, lambda x: nn.init.orthogonal_(x, 0.01),
             lambda x: nn.init.constant_(x, 0))
Beispiel #8
0
    def __init__(self, num_inputs, num_outputs):
        super(BernoulliActorLayer, self).__init__()

        self.actor = nn.Linear(num_inputs, num_outputs)
        init(self.actor, nn.init.orthogonal_,
             lambda x: nn.init.constant_(x, 0))