def main(): env = gym.make('CartPole-v0') n_actions = env.action_space.n hidden = 16 model = nn.Sequential([ nn.Dense(hidden, activation='relu'), nn.Dense(n_actions), ]) agent = DQN(model=model, double=True, env=env) scores = agent.train(episodes=300) plt.plot(scores) plt.show()
def model(x, params): # Define the network architecture x = nn.Flatten()(x) x = nn.Dense(units=64, activation='relu')(x) x = nn.Dense(units=32, activation='relu')(x) outputs = nn.Dense(units=10)(x) # Compute predictions for prediction mode predictions = nn.tf.argmax(outputs, axis=1) # Configure the learning process return dict(outputs=outputs, predictions=predictions, loss='sparse_softmax_cross_entropy', optimizer=('GradientDescent', params['learning_rate']), metrics=['accuracy'])
def main(): env = gym.make('CartPole-v0') n_actions = env.action_space.n hidden = 16 policy = nn.Sequential([ nn.Dense(hidden, activation='relu'), nn.Dense(n_actions, activation='softmax'), ]) critic = nn.Sequential([ nn.Dense(hidden, activation='relu'), nn.Dense(1), ]) agent = PPO(policy=policy, critic=critic, env=env) scores = agent.train(episodes=200) plt.plot(scores) plt.show()
def __init__(self): super().__init__() self.layers = [ nn.Dense(3072, 100), # layer1 nn.BatchNorm(100), nn.ReLU(), nn.Dropout(0.75), nn.Dense(100, 100), # layer2 nn.BatchNorm(100), nn.ReLU(), nn.Dropout(0.75), nn.Dense(100, 100), # layer3 nn.BatchNorm(100), nn.ReLU(), nn.Dropout(0.75), nn.Dense(100, 100), # layer4 nn.BatchNorm(100), nn.ReLU(), nn.Dropout(0.75), nn.Dense(100, 10), # layer5 ]
def __init__(self): super(Model, self).__init__() self.c1 = nn.Conv2D((6, 6), 1, 10) self.c2 = nn.Conv2D((6, 6), 10, 15) self.c3 = nn.Conv2D((4, 4), 15, 20) self.c4 = nn.Conv2D((3, 3), 20, 25) self.lstm = nn.LSTM(13 * 13, 13 * 13) self.dense = nn.Dense(13 * 13, 10) self._parameters = { 'conv1': self.c1, 'conv2': self.c2, 'conv3': self.c3, 'conv4': self.c4, 'lstm': self.lstm, 'dense': self.dense, }
def __init__(self): super(Model, self).__init__() self.layer_1 = nn.Dense(2, 8) self.layer_2 = nn.Dense(8, 8) self.layer_3 = nn.Dense(8, 1)
import comm import matplotlib.pyplot as plt import numpy as np import gym import multiprocessing as mp import sys env = gym.make('CarRacing-v0') # plt.ion() # define network architecture x = i = nn.Input((2*96//8*96//8*3//3,)) x = nn.Dense(20)(x) x = nn.Dense(3)(x) net = nn.Model(i, x) del x, i # vectorized weights and original shape information outw, outs = nn.get_vectorized_weights(net) # run car racing problem def fitness_car_race(w, render: bool=False, steps=1000): score = 0 nn.set_vectorized_weights(net, w, outs) n = 2
return m def ask(self): cppns = self.neat.ask() self.gen = self.neat.gen return [self.create_network(cppn) for cppn in cppns] def tell(self, scores: list): self.neat.tell(scores) self.gen = self.neat.gen if __name__ == "__main__": i = x = nn.Input((2, )) x = nn.Dense(2, activation='sigmoid')(x) x = nn.Dense(1, activation='sigmoid')(x) x = nn.Model(i, x) m_cfg = x.get_config() del i, x pop = None fit = None attempts = 100 success = 0 gens = 0 for i in range(attempts): hn = HyperNeat( m_cfg, {
# Trains the bipedal walker problem using ES import es import nn import numpy as np import gym import multiprocessing as mp import sys env = gym.make('BipedalWalker-v3') # define network architecture x = i = nn.Input((24,)) x = nn.Dense(4)(x) net = nn.Model(i, x) del x, i # vectorized weights and original shape information outw, outs = nn.get_vectorized_weights(net) # run bipedal walker problem def fitness_walker(w, render: bool=False, steps=1000): score = 0 nn.set_vectorized_weights(net, w, outs) for _ in range(3): # env._max_episode_steps = steps obs = env.reset()
import nn import distrib import numpy as np import atari_py import gym import multiprocessing as mp import sys import time env = gym.make('Pong-ram-v4') # define network architecture x = i = nn.Input((128, )) x = nn.Dense(6)(x) net = nn.Model(i, x) del x, i # vectorized weights and original shape information outw, outs = nn.get_vectorized_weights(net) # run Pong def fitness_pong(w, render: bool = False, steps=1000): score = 0 nn.set_vectorized_weights(net, w, outs) for _ in range(1): # env._max_episode_steps = steps
# Trains the cart pole problem using ES import es import nn import numpy as np import gym import multiprocessing as mp import sys env = gym.make('CartPole-v1') # define network architecture x = i = nn.Input((4, )) x = nn.Dense(2)(x) net = nn.Model(i, x) del x, i # vectorized weights and original shape information outw, outs = nn.get_vectorized_weights(net) # run cart pole problem def fitness_cartpole(w: np.ndarray, render: bool = False, steps=1000): score = 0 nn.set_vectorized_weights(net, w, outs) n = 10 if render:
def make_agent(env, **kwargs): # See https://stackoverflow.com/a/42506478 import nn class A3C(Agent): def __init__(self, policy, critic, gradients_queue, parameters_queue, index=None, t_max=5, optimizer=None, transitions=-1, **kwargs): super(A3C, self).__init__(transitions=transitions, **kwargs) self.policy = policy self.critic = critic self.optimizer = optimizer or nn.Adam() self.t_max = t_max self.gradients_queue = gradients_queue self.parameters_queue = parameters_queue self.index = index def act(self, state): probs = self.policy(state[None])[0].numpy() action = np.random.choice(len(probs), p=probs) return action def on_step_end(self): if len(self.transitions) == self.t_max: self.learn() def on_episode_end(self): if len(self.transitions) > 0: self.learn() def learn(self): batch_size = len(self.transitions) data = self.transitions.get() self.transitions.reset() S, A, R, Snext, dones = data A = A.reshape([-1, 1]) batch_shape = (batch_size, ) gamma, policy, critic = self.gamma, self.policy, self.critic # If last state is not terminal then bootstrap from it if not dones[-1]: R[-1] += gamma * critic( Snext[-1:])[0][0].numpy() # handle batching G = self.compute_returns(R) deltas = G - critic(S).detach().flatten() U.check_shape(deltas, batch_shape) with nn.GradientTape() as tape: # Policy Objective probs = policy(S).gather(A, batch_dims=1).flatten() U.check_shape(probs, batch_shape) policy_objective = deltas * probs.log() U.check_shape(policy_objective, batch_shape) policy_objective = policy_objective.mean() U.check_shape(policy_objective, ()) # Critic Loss V = critic(S).flatten() U.check_shape(V, batch_shape) critic_loss = (G - V).pow(2).mean() U.check_shape(critic_loss, ()) # Total Loss loss = -policy_objective + critic_loss grads = tape.gradient(loss, self.parameters) self.send_gradients(grads) self.receive_parameters() def send_gradients(self, grads): self.gradients_queue.put((self.index, grads)) def receive_gradients(self): i, grads = self.gradients_queue.get() if grads is not None: self.apply_gradients(grads) return i, grads def apply_gradients(self, grads): self.optimizer.apply_gradients(zip(grads, self.parameters)) def get_weights(self): return self.policy.get_weights(), self.critic.get_weights() def set_weights(self, weights): policy_weights, critic_weights = weights self.policy.set_weights(policy_weights) self.critic.set_weights(critic_weights) def send_parameters(self, i=None): params = self.get_weights() if i is None: queues = self.parameters_queue else: queues = self.parameters_queue[i:i + 1] for q in queues: q.put(params) def receive_parameters(self): params = self.parameters_queue[self.index].get() self.set_weights(params) @property def parameters(self): if self._parameters is None: params = self.policy.trainable_variables + self.critic.trainable_variables params = U.unique(params) self._parameters = params return self._parameters n_actions = env.action_space.n hidden = 16 policy = nn.Sequential([ nn.Dense(hidden, activation='relu'), nn.Dense(n_actions, activation='softmax'), ]) critic = nn.Sequential([ nn.Dense(hidden, activation='relu'), nn.Dense(1), ]) # initialize weights state = env.observation_space.sample() policy(state[None]) critic(state[None]) agent = A3C(policy=policy, critic=critic, env=env, **kwargs) return agent
# Trains the XOR problem using ES import es import nn import numpy as np import multiprocessing as mp # define network architecture x = i = nn.Input((2, )) x = nn.Dense(2)(x) x = nn.Dense(1)(x) net = nn.Model(i, x) del x, i # vectorized weights and original shape information outw, outs = nn.get_vectorized_weights(net) # test XOR def fitness_xor(w: np.ndarray): total, p = 0, 2 nn.set_vectorized_weights(net, w, outs) out = net.predict(np.array([[0, 0], [0, 1], [1, 0], [1, 1]])) total += np.power(0 - out[0, 0], p) total += np.power(1 - out[1, 0], p) total += np.power(1 - out[2, 0], p) total += np.power(0 - out[3, 0], p)