Esempio n. 1
0
 def __init__(self, state_size, action_size, fc1_units=256, fc2_units=128):
     super(Actor, self).__init__()
     self.net = FCNet(state_size,
                      action_size, [fc1_units, fc2_units],
                      random_seed=RANDOM_SEED,
                      output_func=torch.tanh)
     self.to(DEVICE)
Esempio n. 2
0
 def __init__(self, state_size, action_size, shared_feature_size, fc_units):
     super(CategoricalActorCritic, self).__init__()
     self.shared_net = FCNet(input_size=state_size,
                             output_size=shared_feature_size,
                             hidden_sizes=fc_units,
                             random_seed=RANDOM_SEED)
     self.actor = Softmax(feature_size=shared_feature_size,
                          action_size=action_size)
     self.critic = nn.Linear(shared_feature_size, 1)
     self.to(DEVICE)
Esempio n. 3
0
class Critic(nn.Module):
    def __init__(self, state_size, fc1_units=256, fc2_units=128):
        super(Critic, self).__init__()
        self.net = FCNet(state_size,
                         1, [fc1_units, fc2_units],
                         random_seed=RANDOM_SEED)
        self.to(DEVICE)

    def reset_parameters(self):
        self.net.reset_parameters()

    def forward(self, state_action):
        return self.net.forward(state_action)
Esempio n. 4
0
class Actor(nn.Module):
    def __init__(self, state_size, action_size, fc1_units=256, fc2_units=128):
        super(Actor, self).__init__()
        self.net = FCNet(state_size,
                         action_size, [fc1_units, fc2_units],
                         random_seed=RANDOM_SEED,
                         output_func=torch.tanh)
        self.to(DEVICE)

    def reset_parameters(self):
        self.net.reset_parameters()

    def forward(self, state):
        return self.net.forward(state)
Esempio n. 5
0
class Actor(nn.Module):
    def __init__(self, state_size, action_size, fc1_units=64, fc2_units=32):
        super(Actor, self).__init__()
        self.net = FCNet(state_size,
                         action_size, [fc1_units, fc2_units],
                         random_seed=RANDOM_SEED)
        self.to(DEVICE)

    def reset_parameters(self):
        self.net.reset_parameters()

    def forward(self, state):
        logits = self.net.forward(state)
        dist = torch.distributions.Categorical(logits=logits)
        action = dist.sample()  # action.shape = (N)
        log_prob = dist.log_prob(action)  # log_prob.shape = (N)
        entropy = dist.entropy().sum(-1).unsqueeze(-1)
        return entropy, action.unsqueeze(-1), log_prob.unsqueeze(-1)
Esempio n. 6
0
class CategoricalActorCritic(nn.Module):
    def __init__(self, state_size, action_size, shared_feature_size, fc_units):
        super(CategoricalActorCritic, self).__init__()
        self.shared_net = FCNet(input_size=state_size,
                                output_size=shared_feature_size,
                                hidden_sizes=fc_units,
                                random_seed=RANDOM_SEED)
        self.actor = Softmax(feature_size=shared_feature_size,
                             action_size=action_size)
        self.critic = nn.Linear(shared_feature_size, 1)
        self.to(DEVICE)

    def forward(self, states):
        features = self.shared_net.forward(states)
        entropy, actions, log_prob = self.actor.forward(features=features)
        critic_v = self.critic.forward(features)
        return entropy, actions, log_prob, critic_v
Esempio n. 7
0
 def __init__(self, state_size, fc1_units=256, fc2_units=128):
     super(Critic, self).__init__()
     self.net = FCNet(state_size,
                      1, [fc1_units, fc2_units],
                      random_seed=RANDOM_SEED)
     self.to(DEVICE)
Esempio n. 8
0
 def __init__(self, state_size, action_size, fc1_units=64, fc2_units=32):
     super(Actor, self).__init__()
     self.net = FCNet(state_size,
                      action_size, [fc1_units, fc2_units],
                      random_seed=RANDOM_SEED)
     self.to(DEVICE)
Esempio n. 9
0
 def __init__(self, state_size, action_size, fc1_units=256, fc2_units=128):
     super(NavModel, self).__init__()
     self.net = FCNet(state_size,
                      action_size, [fc1_units, fc2_units],
                      random_seed=RANDOM_SEED)
     self.to(DEVICE)