예제 #1
0
def fc_net(env, frames=1):
    return nn.Sequential(
        Flatten(),
        nn.Linear(env.state_space.shape[0] * frames, 256),
        nn.Tanh(),
        nn.Linear(256, env.action_space.n)
    )
def conv_features():
    return nn.Sequential(
        nn.Conv2d(4, 32, 8, stride=4),
        nn.ReLU(),
        nn.Conv2d(32, 64, 4, stride=2),
        nn.ReLU(),
        nn.Conv2d(64, 64, 3, stride=1),
        nn.ReLU(),
        Flatten(),
    )
예제 #3
0
def dueling_fc_net(env, sigma_init):
    return nn.Sequential(
        Flatten(),
        Dueling(
            nn.Sequential(
                nn.Linear(env.state_space.shape[0], 256),
                nn.ReLU(),
                nn.Linear(256, 1)
            ),
            nn.Sequential(
                nn.Linear(env.state_space.shape[0], 256),
                nn.ReLU(),
                NoisyLinear(256, env.action_space.n, sigma_init=sigma_init)
            )
        )
    )
def dueling_conv_net(env, frames=4):
    return nn.Sequential(
        nn.Conv2d(frames, 32, 8, stride=4),
        nn.ReLU(),
        nn.Conv2d(32, 64, 4, stride=2),
        nn.ReLU(),
        nn.Conv2d(64, 64, 3, stride=1),
        nn.ReLU(),
        Flatten(),
        Dueling(
            nn.Sequential(
                nn.Linear(3456, 512),
                nn.ReLU(),
                Linear0(512, 1)
            ),
            nn.Sequential(
                nn.Linear(3456, 512),
                nn.ReLU(),
                Linear0(512, env.action_space.n)
            ),
        )
    )
예제 #5
0
def fc_policy(env):
    return nn.Sequential(Flatten(), nn.Linear(env.state_space.shape[0], 256),
                         nn.ReLU(), nn.Linear(256, env.action_space.n))
예제 #6
0
def fc_value(env):
    return nn.Sequential(Flatten(), nn.Linear(env.state_space.shape[0], 256),
                         nn.ReLU(), nn.Linear(256, 1))
예제 #7
0
def fc_features(env):
    return nn.Sequential(Flatten(), nn.Linear(env.state_space.shape[0], 256),
                         nn.ReLU())