def dueling_conv_net(env, frames=4): return nn.Sequential( nn.Conv2d(frames, 32, 8, stride=4), nn.ReLU(), nn.Conv2d(32, 64, 4, stride=2), nn.ReLU(), nn.Conv2d(64, 64, 3, stride=1), nn.ReLU(), Flatten(), Dueling( nn.Sequential( nn.Linear(3456, 512), nn.ReLU(), Linear0(512, 1) ), nn.Sequential( nn.Linear(3456, 512), nn.ReLU(), Linear0(512, env.action_space.n) ), ) )
def policy_net(env): return nn.Sequential( nn.Linear(3456, 512), nn.ReLU(), Linear0(512, env.action_space.n) )
def test_linear0(self): model = Linear0(3, 3) result = model(torch.tensor([[3., -2., 10]])) tt.assert_equal(result, torch.tensor([[0., 0., 0.]]))
def value_net(): return nn.Sequential(nn.Linear(3456, 512), nn.ReLU(), Linear0(512, 1))