コード例 #1
0
def main():
    env = gym.make('CartPole-v0')
    n_actions = env.action_space.n
    hidden = 16
    model = nn.Sequential([
        nn.Dense(hidden, activation='relu'),
        nn.Dense(n_actions),
    ])
    agent = DQN(model=model, double=True, env=env)
    scores = agent.train(episodes=300)
    plt.plot(scores)
    plt.show()
コード例 #2
0
def model(x, params):
    # Define the network architecture
    x = nn.Flatten()(x)
    x = nn.Dense(units=64, activation='relu')(x)
    x = nn.Dense(units=32, activation='relu')(x)
    outputs = nn.Dense(units=10)(x)
    # Compute predictions for prediction mode
    predictions = nn.tf.argmax(outputs, axis=1)

    # Configure the learning process
    return dict(outputs=outputs,
                predictions=predictions,
                loss='sparse_softmax_cross_entropy',
                optimizer=('GradientDescent', params['learning_rate']),
                metrics=['accuracy'])
コード例 #3
0
ファイル: PPO_nn.py プロジェクト: marella/train
def main():
    env = gym.make('CartPole-v0')
    n_actions = env.action_space.n
    hidden = 16
    policy = nn.Sequential([
        nn.Dense(hidden, activation='relu'),
        nn.Dense(n_actions, activation='softmax'),
    ])
    critic = nn.Sequential([
        nn.Dense(hidden, activation='relu'),
        nn.Dense(1),
    ])
    agent = PPO(policy=policy, critic=critic, env=env)
    scores = agent.train(episodes=200)
    plt.plot(scores)
    plt.show()
コード例 #4
0
 def __init__(self):
     super().__init__()
     self.layers = [
         nn.Dense(3072, 100),  # layer1
         nn.BatchNorm(100),
         nn.ReLU(),
         nn.Dropout(0.75),
         nn.Dense(100, 100),  # layer2
         nn.BatchNorm(100),
         nn.ReLU(),
         nn.Dropout(0.75),
         nn.Dense(100, 100),  # layer3
         nn.BatchNorm(100),
         nn.ReLU(),
         nn.Dropout(0.75),
         nn.Dense(100, 100),  # layer4
         nn.BatchNorm(100),
         nn.ReLU(),
         nn.Dropout(0.75),
         nn.Dense(100, 10),  # layer5
     ]
コード例 #5
0
    def __init__(self):
        super(Model, self).__init__()

        self.c1 = nn.Conv2D((6, 6), 1, 10)
        self.c2 = nn.Conv2D((6, 6), 10, 15)
        self.c3 = nn.Conv2D((4, 4), 15, 20)
        self.c4 = nn.Conv2D((3, 3), 20, 25)

        self.lstm = nn.LSTM(13 * 13, 13 * 13)

        self.dense = nn.Dense(13 * 13, 10)

        self._parameters = {
            'conv1': self.c1,
            'conv2': self.c2,
            'conv3': self.c3,
            'conv4': self.c4,
            'lstm': self.lstm,
            'dense': self.dense,
        }
コード例 #6
0
ファイル: xor.py プロジェクト: kraglik/surface
    def __init__(self):
        super(Model, self).__init__()

        self.layer_1 = nn.Dense(2, 8)
        self.layer_2 = nn.Dense(8, 8)
        self.layer_3 = nn.Dense(8, 1)
コード例 #7
0
ファイル: carracing_es.py プロジェクト: nhamil/evolution
import comm 

import matplotlib.pyplot as plt 
import numpy as np 
import gym 

import multiprocessing as mp 
import sys 

env = gym.make('CarRacing-v0') 

# plt.ion() 

# define network architecture 
x = i = nn.Input((2*96//8*96//8*3//3,)) 
x = nn.Dense(20)(x) 
x = nn.Dense(3)(x) 
net = nn.Model(i, x) 
del x, i 

# vectorized weights and original shape information 
outw, outs = nn.get_vectorized_weights(net) 

# run car racing problem 
def fitness_car_race(w, render: bool=False, steps=1000): 
    score = 0

    nn.set_vectorized_weights(net, w, outs) 

    n = 2
コード例 #8
0
ファイル: hyperneat.py プロジェクト: nhamil/evolution
        return m

    def ask(self):
        cppns = self.neat.ask()
        self.gen = self.neat.gen
        return [self.create_network(cppn) for cppn in cppns]

    def tell(self, scores: list):
        self.neat.tell(scores)
        self.gen = self.neat.gen


if __name__ == "__main__":
    i = x = nn.Input((2, ))
    x = nn.Dense(2, activation='sigmoid')(x)
    x = nn.Dense(1, activation='sigmoid')(x)
    x = nn.Model(i, x)
    m_cfg = x.get_config()
    del i, x

    pop = None
    fit = None

    attempts = 100
    success = 0
    gens = 0

    for i in range(attempts):
        hn = HyperNeat(
            m_cfg, {
コード例 #9
0
ファイル: bipedalwalker_es.py プロジェクト: nhamil/evolution
# Trains the bipedal walker problem using ES 

import es 
import nn

import numpy as np 
import gym 

import multiprocessing as mp 
import sys 

env = gym.make('BipedalWalker-v3') 

# define network architecture 
x = i = nn.Input((24,)) 
x = nn.Dense(4)(x) 
net = nn.Model(i, x) 
del x, i 

# vectorized weights and original shape information 
outw, outs = nn.get_vectorized_weights(net) 

# run bipedal walker problem 
def fitness_walker(w, render: bool=False, steps=1000): 
    score = 0

    nn.set_vectorized_weights(net, w, outs) 

    for _ in range(3): 
        # env._max_episode_steps = steps
        obs = env.reset() 
コード例 #10
0
import nn
import distrib

import numpy as np
import atari_py
import gym

import multiprocessing as mp
import sys
import time

env = gym.make('Pong-ram-v4')

# define network architecture
x = i = nn.Input((128, ))
x = nn.Dense(6)(x)
net = nn.Model(i, x)
del x, i

# vectorized weights and original shape information
outw, outs = nn.get_vectorized_weights(net)


# run Pong
def fitness_pong(w, render: bool = False, steps=1000):
    score = 0

    nn.set_vectorized_weights(net, w, outs)

    for _ in range(1):
        # env._max_episode_steps = steps
コード例 #11
0
# Trains the cart pole problem using ES

import es
import nn

import numpy as np
import gym

import multiprocessing as mp
import sys

env = gym.make('CartPole-v1')

# define network architecture
x = i = nn.Input((4, ))
x = nn.Dense(2)(x)
net = nn.Model(i, x)
del x, i

# vectorized weights and original shape information
outw, outs = nn.get_vectorized_weights(net)


# run cart pole problem
def fitness_cartpole(w: np.ndarray, render: bool = False, steps=1000):
    score = 0

    nn.set_vectorized_weights(net, w, outs)

    n = 10
    if render:
コード例 #12
0
def make_agent(env, **kwargs):
    # See https://stackoverflow.com/a/42506478
    import nn

    class A3C(Agent):
        def __init__(self,
                     policy,
                     critic,
                     gradients_queue,
                     parameters_queue,
                     index=None,
                     t_max=5,
                     optimizer=None,
                     transitions=-1,
                     **kwargs):
            super(A3C, self).__init__(transitions=transitions, **kwargs)
            self.policy = policy
            self.critic = critic
            self.optimizer = optimizer or nn.Adam()
            self.t_max = t_max
            self.gradients_queue = gradients_queue
            self.parameters_queue = parameters_queue
            self.index = index

        def act(self, state):
            probs = self.policy(state[None])[0].numpy()
            action = np.random.choice(len(probs), p=probs)
            return action

        def on_step_end(self):
            if len(self.transitions) == self.t_max:
                self.learn()

        def on_episode_end(self):
            if len(self.transitions) > 0:
                self.learn()

        def learn(self):
            batch_size = len(self.transitions)
            data = self.transitions.get()
            self.transitions.reset()
            S, A, R, Snext, dones = data
            A = A.reshape([-1, 1])
            batch_shape = (batch_size, )
            gamma, policy, critic = self.gamma, self.policy, self.critic
            # If last state is not terminal then bootstrap from it
            if not dones[-1]:
                R[-1] += gamma * critic(
                    Snext[-1:])[0][0].numpy()  # handle batching
            G = self.compute_returns(R)
            deltas = G - critic(S).detach().flatten()
            U.check_shape(deltas, batch_shape)
            with nn.GradientTape() as tape:
                # Policy Objective
                probs = policy(S).gather(A, batch_dims=1).flatten()
                U.check_shape(probs, batch_shape)
                policy_objective = deltas * probs.log()
                U.check_shape(policy_objective, batch_shape)
                policy_objective = policy_objective.mean()
                U.check_shape(policy_objective, ())
                # Critic Loss
                V = critic(S).flatten()
                U.check_shape(V, batch_shape)
                critic_loss = (G - V).pow(2).mean()
                U.check_shape(critic_loss, ())
                # Total Loss
                loss = -policy_objective + critic_loss
            grads = tape.gradient(loss, self.parameters)
            self.send_gradients(grads)
            self.receive_parameters()

        def send_gradients(self, grads):
            self.gradients_queue.put((self.index, grads))

        def receive_gradients(self):
            i, grads = self.gradients_queue.get()
            if grads is not None:
                self.apply_gradients(grads)
            return i, grads

        def apply_gradients(self, grads):
            self.optimizer.apply_gradients(zip(grads, self.parameters))

        def get_weights(self):
            return self.policy.get_weights(), self.critic.get_weights()

        def set_weights(self, weights):
            policy_weights, critic_weights = weights
            self.policy.set_weights(policy_weights)
            self.critic.set_weights(critic_weights)

        def send_parameters(self, i=None):
            params = self.get_weights()
            if i is None:
                queues = self.parameters_queue
            else:
                queues = self.parameters_queue[i:i + 1]
            for q in queues:
                q.put(params)

        def receive_parameters(self):
            params = self.parameters_queue[self.index].get()
            self.set_weights(params)

        @property
        def parameters(self):
            if self._parameters is None:
                params = self.policy.trainable_variables + self.critic.trainable_variables
                params = U.unique(params)
                self._parameters = params
            return self._parameters

    n_actions = env.action_space.n
    hidden = 16
    policy = nn.Sequential([
        nn.Dense(hidden, activation='relu'),
        nn.Dense(n_actions, activation='softmax'),
    ])
    critic = nn.Sequential([
        nn.Dense(hidden, activation='relu'),
        nn.Dense(1),
    ])
    # initialize weights
    state = env.observation_space.sample()
    policy(state[None])
    critic(state[None])
    agent = A3C(policy=policy, critic=critic, env=env, **kwargs)
    return agent
コード例 #13
0
# Trains the XOR problem using ES

import es
import nn

import numpy as np

import multiprocessing as mp

# define network architecture
x = i = nn.Input((2, ))
x = nn.Dense(2)(x)
x = nn.Dense(1)(x)
net = nn.Model(i, x)
del x, i

# vectorized weights and original shape information
outw, outs = nn.get_vectorized_weights(net)


# test XOR
def fitness_xor(w: np.ndarray):
    total, p = 0, 2

    nn.set_vectorized_weights(net, w, outs)
    out = net.predict(np.array([[0, 0], [0, 1], [1, 0], [1, 1]]))

    total += np.power(0 - out[0, 0], p)
    total += np.power(1 - out[1, 0], p)
    total += np.power(1 - out[2, 0], p)
    total += np.power(0 - out[3, 0], p)