def nature_rainbow(env, frames=4, hidden=512, atoms=51, sigma=0.5): return nn.Sequential( nn.Scale(1/255), nn.Conv2d(frames, 32, 8, stride=4), nn.ReLU(), nn.Conv2d(32, 64, 4, stride=2), nn.ReLU(), nn.Conv2d(64, 64, 3, stride=1), nn.ReLU(), nn.Flatten(), nn.CategoricalDueling( nn.Sequential( nn.NoisyFactorizedLinear(3136, hidden, sigma_init=sigma), nn.ReLU(), nn.NoisyFactorizedLinear( hidden, atoms, init_scale=0, sigma_init=sigma ) ), nn.Sequential( nn.NoisyFactorizedLinear(3136, hidden, sigma_init=sigma), nn.ReLU(), nn.NoisyFactorizedLinear( hidden, env.action_space.n * atoms, init_scale=0, sigma_init=sigma ) ) ) )
def dueling_fc_relu_q(env): return nn.Sequential( nn.Flatten(), nn.Dueling( nn.Sequential(nn.Linear(env.state_space.shape[0], 256), nn.ReLU(), nn.Linear(256, 1)), nn.Sequential(nn.Linear(env.state_space.shape[0], 256), nn.ReLU(), nn.Linear(256, env.action_space.n))))
def nature_ddqn(env, frames=4): return nn.Sequential( nn.Scale(1 / 255), nn.Conv2d(frames, 32, 8, stride=4), nn.ReLU(), nn.Conv2d(32, 64, 4, stride=2), nn.ReLU(), nn.Conv2d(64, 64, 3, stride=1), nn.ReLU(), nn.Flatten(), nn.Dueling( nn.Sequential(nn.Linear(3136, 512), nn.ReLU(), nn.Linear0(512, 1)), nn.Sequential(nn.Linear(3136, 512), nn.ReLU(), nn.Linear0(512, env.action_space.n)), ))
def fc_actor_critic(env, hidden1=400, hidden2=300): features = nn.Sequential( nn.Linear(env.state_space.shape[0] + 1, hidden1), nn.ReLU(), ) v = nn.Sequential(nn.Linear(hidden1, hidden2), nn.ReLU(), nn.Linear(hidden2, 1)) policy = nn.Sequential(nn.Linear(hidden1, hidden2), nn.ReLU(), nn.Linear(hidden2, env.action_space.shape[0] * 2)) return features, v, policy
def __init__(self, env, hidden1=400, hidden2=300): super().__init__() self.model = nn.Sequential( nn.Linear(env.state_space.shape[0] + 1, hidden1), nn.Tanh(), nn.Linear(hidden1, hidden2), nn.Tanh(), nn.Linear(hidden2, env.action_space.shape[0])) self.log_stds = nn.Parameter(torch.zeros(env.action_space.shape[0]))
def setUp(self): torch.manual_seed(2) self.model = nn.Sequential(nn.Linear0(STATE_DIM, ACTION_DIM)) self.optimizer = torch.optim.RMSprop(self.model.parameters(), lr=0.01) self.space = Box(np.array([-1, -1, -1]), np.array([1, 1, 1])) self.policy = DeterministicPolicy(self.model, self.optimizer, self.space, 0.5)
def critic(env, hidden1=400, hidden2=300): net = nn.Sequential(nn.Linear(env.state_space.shape[0], hidden1), nn.ReLU(), nn.Linear(hidden1, hidden2), nn.ReLU(), nn.Linear(hidden2, 1)) net.apply(init_weights) net.float() return net
def fc_relu_q(env, hidden=64): return nn.Sequential( nn.Flatten(), nn.Linear(env.state_space.shape[0], hidden), nn.ReLU(), nn.Linear(hidden, env.action_space.n), )
def fc_relu_dist_q(env, hidden=64, atoms=51): return nn.Sequential( nn.Flatten(), nn.Linear(env.state_space.shape[0], hidden), nn.ReLU(), nn.Linear0(hidden, env.action_space.n * atoms), )
def lunar_lander_nature_ddqn(env): return nn.Sequential( nn.Linear(env.observation_space.shape[0], 64), nn.ReLU(), nn.Linear(64, 64), nn.ReLU(), nn.Linear(64, env.action_space.n), )
def features(state_space_size, hidden1=400): net = nn.Sequential( nn.Linear(state_space_size + 1, hidden1), nn.ReLU(), ) net.apply(init_weights) net.float() return net
def fc_v(env, hidden1=400, hidden2=300): return nn.Sequential( nn.Linear(env.state_space.shape[0] + 1, hidden1), nn.ReLU(), nn.Linear(hidden1, hidden2), nn.ReLU(), nn.Linear0(hidden2, 1), )
def fc_soft_policy(env, hidden1=400, hidden2=300): return nn.Sequential( nn.Linear(env.state_space.shape[0] + 1, hidden1), nn.ReLU(), nn.Linear(hidden1, hidden2), nn.ReLU(), nn.Linear0(hidden2, env.action_space.shape[0] * 2), )
def __init__(self, env): super().__init__() self.num_actions = env.action_space.n self.fc = nn.Linear(512, 3136) self.deconv = nn.Sequential( nn.ConvTranspose2d(64, 64, 3, stride=1), nn.ReLU(), nn.ConvTranspose2d(64, 32, 4, stride=2), nn.ReLU(), nn.ConvTranspose2d(32, FRAMES * env.action_space.n, 8, stride=4), nn.Sigmoid(), nn.Scale(255))
def fc_policy(env): return nn.Sequential( nn.Flatten(), nn.Linear(env.state_space.shape[0], 64), nn.ReLU(), nn.Linear(64, 64), nn.ReLU(), nn.Linear0(64, env.action_space.shape[0] * 2), )
def fc_v(env, hidden1=516, hidden2=516): print("Custom V loaded") return nn.Sequential( nn.Linear(env.state_space.shape[0] + 1, hidden1), nn.ReLU(), nn.Linear(hidden1, hidden2), nn.ReLU(), nn.Linear0(hidden2, 1), )
def nature_features(frames=4): return nn.Sequential( nn.Scale(1/255), nn.Conv2d(frames, 32, 8, stride=4), nn.ReLU(), nn.Conv2d(32, 64, 4, stride=2), nn.ReLU(), nn.Conv2d(64, 64, 3, stride=1), nn.ReLU(), nn.Flatten(), nn.Linear(3136, 512), nn.ReLU(), )
def shared_feature_layers(): return nn.Sequential( nn.Scale(1 / 255), nn.Conv2d(FRAMES, 32, 8, stride=4), nn.ReLU(), nn.Conv2d(32, 64, 4, stride=2), nn.ReLU(), nn.Conv2d(64, 64, 3, stride=1), nn.ReLU(), nn.Flatten(), nn.Linear(3136, 512), nn.ReLU(), )
def fc_relu_rainbow(env, hidden=64, atoms=51, sigma=0.5): return nn.Sequential( nn.Flatten(), nn.Linear(env.state_space.shape[0], hidden), nn.ReLU(), nn.CategoricalDueling( nn.NoisyFactorizedLinear(hidden, atoms, sigma_init=sigma), nn.NoisyFactorizedLinear(hidden, env.action_space.n * atoms, init_scale=0.0, sigma_init=sigma), ), )
def conv_features(frames=4): return nn.Sequential( nn.Scale(1/255), nn.Conv2d(frames * 3, 64, 4, stride=2), nn.ReLU(), nn.Conv2d(64, 64, 3, stride=2), nn.ReLU(), nn.Conv2d(64, 64, 3, stride=1), nn.ReLU(), nn.Flatten(), nn.Linear(10816, 512), nn.ReLU(), )
def nature_c51(env, frames=4, atoms=51): return nn.Sequential( nn.Scale(1/255), nn.Conv2d(frames, 32, 8, stride=4), nn.ReLU(), nn.Conv2d(32, 64, 4, stride=2), nn.ReLU(), nn.Conv2d(64, 64, 3, stride=1), nn.ReLU(), nn.Flatten(), nn.Linear(3136, 512), nn.ReLU(), nn.Linear0(512, env.action_space.n * atoms) )
def __init__(self, env, frames=4): super().__init__() n_agents = len(env.agents) n_actions = env.action_spaces['first_0'].n self.conv = nn.Sequential( nn.Scale(1/255), nn.Conv2d(frames, 32, 8, stride=4), nn.ReLU(), nn.Conv2d(32, 64, 4, stride=2), nn.ReLU(), nn.Conv2d(64, 64, 3, stride=1), nn.ReLU(), nn.Flatten() ) self.hidden = nn.Linear(3136 + n_agents, 512) self.output = nn.Linear0(512 + n_agents, n_actions)
def fc_v(env): return nn.Sequential(nn.Flatten(), nn.Linear(env.state_space.shape[0], 64), nn.ReLU(), nn.Linear(64, 64), nn.ReLU(), nn.Linear(64, 1))
def fc_relu_features(env, hidden=64): return nn.Sequential(nn.Flatten(), nn.Linear(env.state_space.shape[0], hidden), nn.ReLU())
def simple_nature_features(frames=4): return nn.Sequential( nn.Scale(1/4), nn.Linear(1, 16), nn.ReLU(), )