def __init__(self, num_inputs, num_outputs, use_state_dependent_std): super(GaussianActorLayer, self).__init__() self.actor_mean = nn.Linear(num_inputs, num_outputs) init(self.actor_mean, nn.init.orthogonal_, lambda x: nn.init.constant_(x, 0)) self.use_state_dependent_std = use_state_dependent_std if self.use_state_dependent_std: self.actor_logstd = nn.Linear(num_inputs, num_outputs) init(self.actor_logstd, nn.init.orthogonal_, lambda x: nn.init.constant_(x, 0)) else: self.logstd = nn.Parameter(torch.zeros(num_outputs), requires_grad=True)
def __init__(self, num_inputs, num_outputs, use_state_dependent_std, init_w=1e-3): super(TanhGaussainActorLayer, self).__init__() self.mean_actor = nn.Linear(num_inputs, num_outputs) init(self.mean_actor, lambda x: nn.init.uniform_(x, -init_w, init_w), lambda x: nn.init.uniform_(x, -init_w, init_w)) self.state_dependent_std = use_state_dependent_std if self.state_dependent_std: self.logstd_actor = nn.Linear(num_inputs, num_outputs) init(self.logstd_actor, lambda x: nn.init.uniform_(x, -init_w, init_w), lambda x: nn.init.uniform_(x, -init_w, init_w)) else: self.logstd = nn.Parameter(torch.zeros(num_outputs), requires_grad=True) self.logstd_actor = lambda _: self.logstd
def init_(m): init(m, truncated_norm_init, lambda x: nn.init.constant_(x, 0))
def init_last_(m): init(m, lambda x: nn.init.uniform_(x, -init_w, init_w), lambda x: nn.init.uniform_(x, -init_w, init_w))
def init_(m): init(m, fanin_init, lambda x: nn.init.constant_(x, init_b))
def __init__(self, num_inputs, num_outputs): super(BernoulliActorLayer, self).__init__() self.logit_actor = nn.Linear(num_inputs, num_outputs) init(self.logit_actor, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0))
def __init__(self, num_inputs, num_outputs): super(CategoricalActorLayer, self).__init__() self.logit_actor = nn.Linear(num_inputs, num_outputs) init(self.logit_actor, lambda x: nn.init.orthogonal_(x, 0.01), lambda x: nn.init.constant_(x, 0))