def nature_rainbow(env, frames=4, hidden=512, atoms=51, sigma=0.5): return nn.Sequential( nn.Scale(1/255), nn.Conv2d(frames, 32, 8, stride=4), nn.ReLU(), nn.Conv2d(32, 64, 4, stride=2), nn.ReLU(), nn.Conv2d(64, 64, 3, stride=1), nn.ReLU(), nn.Flatten(), nn.CategoricalDueling( nn.Sequential( nn.NoisyFactorizedLinear(3136, hidden, sigma_init=sigma), nn.ReLU(), nn.NoisyFactorizedLinear( hidden, atoms, init_scale=0, sigma_init=sigma ) ), nn.Sequential( nn.NoisyFactorizedLinear(3136, hidden, sigma_init=sigma), nn.ReLU(), nn.NoisyFactorizedLinear( hidden, env.action_space.n * atoms, init_scale=0, sigma_init=sigma ) ) ) )
def __init__(self, env): super().__init__() self.num_actions = env.action_space.n self.fc = nn.Linear(512, 3136) self.deconv = nn.Sequential( nn.ConvTranspose2d(64, 64, 3, stride=1), nn.ReLU(), nn.ConvTranspose2d(64, 32, 4, stride=2), nn.ReLU(), nn.ConvTranspose2d(32, FRAMES * env.action_space.n, 8, stride=4), nn.Sigmoid(), nn.Scale(255))
def nature_ddqn(env, frames=4): return nn.Sequential( nn.Scale(1 / 255), nn.Conv2d(frames, 32, 8, stride=4), nn.ReLU(), nn.Conv2d(32, 64, 4, stride=2), nn.ReLU(), nn.Conv2d(64, 64, 3, stride=1), nn.ReLU(), nn.Flatten(), nn.Dueling( nn.Sequential(nn.Linear(3136, 512), nn.ReLU(), nn.Linear0(512, 1)), nn.Sequential(nn.Linear(3136, 512), nn.ReLU(), nn.Linear0(512, env.action_space.n)), ))
def nature_features(frames=4): return nn.Sequential( nn.Scale(1/255), nn.Conv2d(frames, 32, 8, stride=4), nn.ReLU(), nn.Conv2d(32, 64, 4, stride=2), nn.ReLU(), nn.Conv2d(64, 64, 3, stride=1), nn.ReLU(), nn.Flatten(), nn.Linear(3136, 512), nn.ReLU(), )
def shared_feature_layers(): return nn.Sequential( nn.Scale(1 / 255), nn.Conv2d(FRAMES, 32, 8, stride=4), nn.ReLU(), nn.Conv2d(32, 64, 4, stride=2), nn.ReLU(), nn.Conv2d(64, 64, 3, stride=1), nn.ReLU(), nn.Flatten(), nn.Linear(3136, 512), nn.ReLU(), )
def conv_features(frames=4): return nn.Sequential( nn.Scale(1/255), nn.Conv2d(frames * 3, 64, 4, stride=2), nn.ReLU(), nn.Conv2d(64, 64, 3, stride=2), nn.ReLU(), nn.Conv2d(64, 64, 3, stride=1), nn.ReLU(), nn.Flatten(), nn.Linear(10816, 512), nn.ReLU(), )
def nature_c51(env, frames=4, atoms=51): return nn.Sequential( nn.Scale(1/255), nn.Conv2d(frames, 32, 8, stride=4), nn.ReLU(), nn.Conv2d(32, 64, 4, stride=2), nn.ReLU(), nn.Conv2d(64, 64, 3, stride=1), nn.ReLU(), nn.Flatten(), nn.Linear(3136, 512), nn.ReLU(), nn.Linear0(512, env.action_space.n * atoms) )
def __init__(self, env, frames=4): super().__init__() n_agents = len(env.agents) n_actions = env.action_spaces['first_0'].n self.conv = nn.Sequential( nn.Scale(1/255), nn.Conv2d(frames, 32, 8, stride=4), nn.ReLU(), nn.Conv2d(32, 64, 4, stride=2), nn.ReLU(), nn.Conv2d(64, 64, 3, stride=1), nn.ReLU(), nn.Flatten() ) self.hidden = nn.Linear(3136 + n_agents, 512) self.output = nn.Linear0(512 + n_agents, n_actions)
def simple_nature_features(frames=4): return nn.Sequential( nn.Scale(1/4), nn.Linear(1, 16), nn.ReLU(), )