def dueling_conv_net(env, frames=4):
    return nn.Sequential(
        nn.Conv2d(frames, 32, 8, stride=4),
        nn.ReLU(),
        nn.Conv2d(32, 64, 4, stride=2),
        nn.ReLU(),
        nn.Conv2d(64, 64, 3, stride=1),
        nn.ReLU(),
        Flatten(),
        Dueling(
            nn.Sequential(
                nn.Linear(3456, 512),
                nn.ReLU(),
                Linear0(512, 1)
            ),
            nn.Sequential(
                nn.Linear(3456, 512),
                nn.ReLU(),
                Linear0(512, env.action_space.n)
            ),
        )
    )
def policy_net(env):
    return nn.Sequential(
        nn.Linear(3456, 512), nn.ReLU(), Linear0(512, env.action_space.n)
    )
 def test_linear0(self):
     model = Linear0(3, 3)
     result = model(torch.tensor([[3., -2., 10]]))
     tt.assert_equal(result, torch.tensor([[0., 0., 0.]]))
def value_net():
    return nn.Sequential(nn.Linear(3456, 512), nn.ReLU(), Linear0(512, 1))