Beispiel #1
0
def nature_rainbow(env, frames=4, hidden=512, atoms=51, sigma=0.5):
    return nn.Sequential(
        nn.Scale(1/255),
        nn.Conv2d(frames, 32, 8, stride=4),
        nn.ReLU(),
        nn.Conv2d(32, 64, 4, stride=2),
        nn.ReLU(),
        nn.Conv2d(64, 64, 3, stride=1),
        nn.ReLU(),
        nn.Flatten(),
        nn.CategoricalDueling(
            nn.Sequential(
                nn.NoisyFactorizedLinear(3136, hidden, sigma_init=sigma),
                nn.ReLU(),
                nn.NoisyFactorizedLinear(
                    hidden,
                    atoms,
                    init_scale=0,
                    sigma_init=sigma
                )
            ),
            nn.Sequential(
                nn.NoisyFactorizedLinear(3136, hidden, sigma_init=sigma),
                nn.ReLU(),
                nn.NoisyFactorizedLinear(
                    hidden,
                    env.action_space.n * atoms,
                    init_scale=0,
                    sigma_init=sigma
                )
            )
        )
    )
Beispiel #2
0
def dueling_fc_relu_q(env):
    return nn.Sequential(
        nn.Flatten(),
        nn.Dueling(
            nn.Sequential(nn.Linear(env.state_space.shape[0], 256), nn.ReLU(),
                          nn.Linear(256, 1)),
            nn.Sequential(nn.Linear(env.state_space.shape[0], 256), nn.ReLU(),
                          nn.Linear(256, env.action_space.n))))
Beispiel #3
0
def nature_ddqn(env, frames=4):
    return nn.Sequential(
        nn.Scale(1 / 255), nn.Conv2d(frames, 32, 8, stride=4), nn.ReLU(),
        nn.Conv2d(32, 64, 4, stride=2), nn.ReLU(),
        nn.Conv2d(64, 64, 3, stride=1), nn.ReLU(), nn.Flatten(),
        nn.Dueling(
            nn.Sequential(nn.Linear(3136, 512), nn.ReLU(), nn.Linear0(512, 1)),
            nn.Sequential(nn.Linear(3136, 512), nn.ReLU(),
                          nn.Linear0(512, env.action_space.n)),
        ))
Beispiel #4
0
def fc_actor_critic(env, hidden1=400, hidden2=300):
    features = nn.Sequential(
        nn.Linear(env.state_space.shape[0] + 1, hidden1),
        nn.ReLU(),
    )

    v = nn.Sequential(nn.Linear(hidden1, hidden2), nn.ReLU(),
                      nn.Linear(hidden2, 1))

    policy = nn.Sequential(nn.Linear(hidden1, hidden2), nn.ReLU(),
                           nn.Linear(hidden2, env.action_space.shape[0] * 2))

    return features, v, policy
Beispiel #5
0
 def __init__(self, env, hidden1=400, hidden2=300):
     super().__init__()
     self.model = nn.Sequential(
         nn.Linear(env.state_space.shape[0] + 1, hidden1), nn.Tanh(),
         nn.Linear(hidden1, hidden2), nn.Tanh(),
         nn.Linear(hidden2, env.action_space.shape[0]))
     self.log_stds = nn.Parameter(torch.zeros(env.action_space.shape[0]))
 def setUp(self):
     torch.manual_seed(2)
     self.model = nn.Sequential(nn.Linear0(STATE_DIM, ACTION_DIM))
     self.optimizer = torch.optim.RMSprop(self.model.parameters(), lr=0.01)
     self.space = Box(np.array([-1, -1, -1]), np.array([1, 1, 1]))
     self.policy = DeterministicPolicy(self.model, self.optimizer,
                                       self.space, 0.5)
Beispiel #7
0
def critic(env, hidden1=400, hidden2=300):
    net = nn.Sequential(nn.Linear(env.state_space.shape[0], hidden1),
                        nn.ReLU(), nn.Linear(hidden1, hidden2), nn.ReLU(),
                        nn.Linear(hidden2, 1))
    net.apply(init_weights)
    net.float()
    return net
Beispiel #8
0
def fc_relu_q(env, hidden=64):
    return nn.Sequential(
        nn.Flatten(),
        nn.Linear(env.state_space.shape[0], hidden),
        nn.ReLU(),
        nn.Linear(hidden, env.action_space.n),
    )
Beispiel #9
0
def fc_relu_dist_q(env, hidden=64, atoms=51):
    return nn.Sequential(
        nn.Flatten(),
        nn.Linear(env.state_space.shape[0], hidden),
        nn.ReLU(),
        nn.Linear0(hidden, env.action_space.n * atoms),
    )
Beispiel #10
0
def lunar_lander_nature_ddqn(env):
    return nn.Sequential(
        nn.Linear(env.observation_space.shape[0], 64),
        nn.ReLU(),
        nn.Linear(64, 64),
        nn.ReLU(),
        nn.Linear(64, env.action_space.n),
    )
Beispiel #11
0
def features(state_space_size, hidden1=400):
    net = nn.Sequential(
        nn.Linear(state_space_size + 1, hidden1),
        nn.ReLU(),
    )
    net.apply(init_weights)
    net.float()
    return net
Beispiel #12
0
def fc_v(env, hidden1=400, hidden2=300):
    return nn.Sequential(
        nn.Linear(env.state_space.shape[0] + 1, hidden1),
        nn.ReLU(),
        nn.Linear(hidden1, hidden2),
        nn.ReLU(),
        nn.Linear0(hidden2, 1),
    )
Beispiel #13
0
def fc_soft_policy(env, hidden1=400, hidden2=300):
    return nn.Sequential(
        nn.Linear(env.state_space.shape[0] + 1, hidden1),
        nn.ReLU(),
        nn.Linear(hidden1, hidden2),
        nn.ReLU(),
        nn.Linear0(hidden2, env.action_space.shape[0] * 2),
    )
Beispiel #14
0
 def __init__(self, env):
     super().__init__()
     self.num_actions = env.action_space.n
     self.fc = nn.Linear(512, 3136)
     self.deconv = nn.Sequential(
         nn.ConvTranspose2d(64, 64, 3, stride=1), nn.ReLU(),
         nn.ConvTranspose2d(64, 32, 4, stride=2), nn.ReLU(),
         nn.ConvTranspose2d(32, FRAMES * env.action_space.n, 8, stride=4),
         nn.Sigmoid(), nn.Scale(255))
Beispiel #15
0
def fc_policy(env):
    return nn.Sequential(
        nn.Flatten(),
        nn.Linear(env.state_space.shape[0], 64),
        nn.ReLU(),
        nn.Linear(64, 64),
        nn.ReLU(),
        nn.Linear0(64, env.action_space.shape[0] * 2),
    )
Beispiel #16
0
def fc_v(env, hidden1=516, hidden2=516):
    print("Custom V loaded")
    return nn.Sequential(
        nn.Linear(env.state_space.shape[0] + 1, hidden1),
        nn.ReLU(),
        nn.Linear(hidden1, hidden2),
        nn.ReLU(),
        nn.Linear0(hidden2, 1),
    )
Beispiel #17
0
def nature_features(frames=4):
    return nn.Sequential(
        nn.Scale(1/255),
        nn.Conv2d(frames, 32, 8, stride=4),
        nn.ReLU(),
        nn.Conv2d(32, 64, 4, stride=2),
        nn.ReLU(),
        nn.Conv2d(64, 64, 3, stride=1),
        nn.ReLU(),
        nn.Flatten(),
        nn.Linear(3136, 512),
        nn.ReLU(),
    )
Beispiel #18
0
def shared_feature_layers():
    return nn.Sequential(
        nn.Scale(1 / 255),
        nn.Conv2d(FRAMES, 32, 8, stride=4),
        nn.ReLU(),
        nn.Conv2d(32, 64, 4, stride=2),
        nn.ReLU(),
        nn.Conv2d(64, 64, 3, stride=1),
        nn.ReLU(),
        nn.Flatten(),
        nn.Linear(3136, 512),
        nn.ReLU(),
    )
Beispiel #19
0
def fc_relu_rainbow(env, hidden=64, atoms=51, sigma=0.5):
    return nn.Sequential(
        nn.Flatten(),
        nn.Linear(env.state_space.shape[0], hidden),
        nn.ReLU(),
        nn.CategoricalDueling(
            nn.NoisyFactorizedLinear(hidden, atoms, sigma_init=sigma),
            nn.NoisyFactorizedLinear(hidden,
                                     env.action_space.n * atoms,
                                     init_scale=0.0,
                                     sigma_init=sigma),
        ),
    )
Beispiel #20
0
def conv_features(frames=4):
    return nn.Sequential(
        nn.Scale(1/255),
        nn.Conv2d(frames * 3, 64, 4, stride=2),
        nn.ReLU(),
        nn.Conv2d(64, 64, 3, stride=2),
        nn.ReLU(),
        nn.Conv2d(64, 64, 3, stride=1),
        nn.ReLU(),
        nn.Flatten(),
        nn.Linear(10816, 512),
        nn.ReLU(),
    )
Beispiel #21
0
def nature_c51(env, frames=4, atoms=51):
    return nn.Sequential(
        nn.Scale(1/255),
        nn.Conv2d(frames, 32, 8, stride=4),
        nn.ReLU(),
        nn.Conv2d(32, 64, 4, stride=2),
        nn.ReLU(),
        nn.Conv2d(64, 64, 3, stride=1),
        nn.ReLU(),
        nn.Flatten(),
        nn.Linear(3136, 512),
        nn.ReLU(),
        nn.Linear0(512, env.action_space.n * atoms)
    )
Beispiel #22
0
 def __init__(self, env, frames=4):
     super().__init__()
     n_agents = len(env.agents)
     n_actions = env.action_spaces['first_0'].n
     self.conv = nn.Sequential(
         nn.Scale(1/255),
         nn.Conv2d(frames, 32, 8, stride=4),
         nn.ReLU(),
         nn.Conv2d(32, 64, 4, stride=2),
         nn.ReLU(),
         nn.Conv2d(64, 64, 3, stride=1),
         nn.ReLU(),
         nn.Flatten()
     )
     self.hidden = nn.Linear(3136 + n_agents, 512)
     self.output = nn.Linear0(512 + n_agents, n_actions)
Beispiel #23
0
def fc_v(env):
    return nn.Sequential(nn.Flatten(), nn.Linear(env.state_space.shape[0], 64),
                         nn.ReLU(), nn.Linear(64, 64), nn.ReLU(),
                         nn.Linear(64, 1))
Beispiel #24
0
def fc_relu_features(env, hidden=64):
    return nn.Sequential(nn.Flatten(),
                         nn.Linear(env.state_space.shape[0], hidden),
                         nn.ReLU())
Beispiel #25
0
def simple_nature_features(frames=4):
    return nn.Sequential(
        nn.Scale(1/4),
        nn.Linear(1, 16),
        nn.ReLU(),
    )