def fc_net(env, frames=1): return nn.Sequential( Flatten(), nn.Linear(env.state_space.shape[0] * frames, 256), nn.Tanh(), nn.Linear(256, env.action_space.n) )
def conv_features(): return nn.Sequential( nn.Conv2d(4, 32, 8, stride=4), nn.ReLU(), nn.Conv2d(32, 64, 4, stride=2), nn.ReLU(), nn.Conv2d(64, 64, 3, stride=1), nn.ReLU(), Flatten(), )
def dueling_fc_net(env, sigma_init): return nn.Sequential( Flatten(), Dueling( nn.Sequential( nn.Linear(env.state_space.shape[0], 256), nn.ReLU(), nn.Linear(256, 1) ), nn.Sequential( nn.Linear(env.state_space.shape[0], 256), nn.ReLU(), NoisyLinear(256, env.action_space.n, sigma_init=sigma_init) ) ) )
def dueling_conv_net(env, frames=4): return nn.Sequential( nn.Conv2d(frames, 32, 8, stride=4), nn.ReLU(), nn.Conv2d(32, 64, 4, stride=2), nn.ReLU(), nn.Conv2d(64, 64, 3, stride=1), nn.ReLU(), Flatten(), Dueling( nn.Sequential( nn.Linear(3456, 512), nn.ReLU(), Linear0(512, 1) ), nn.Sequential( nn.Linear(3456, 512), nn.ReLU(), Linear0(512, env.action_space.n) ), ) )
def fc_policy(env): return nn.Sequential(Flatten(), nn.Linear(env.state_space.shape[0], 256), nn.ReLU(), nn.Linear(256, env.action_space.n))
def fc_value(env): return nn.Sequential(Flatten(), nn.Linear(env.state_space.shape[0], 256), nn.ReLU(), nn.Linear(256, 1))
def fc_features(env): return nn.Sequential(Flatten(), nn.Linear(env.state_space.shape[0], 256), nn.ReLU())