Example #1
0
def test(args):
    env_id, want_reward, generations, layers = args
    print(f'START({env_id})')

    with gym.make(env_id) as env:
        layers = (*env.observation_space.shape, *layers)
        if isinstance(env.action_space, Box):
            layers = (*layers, *env.action_space.shape)
        else:
            layers = (*layers, env.action_space.n)

        net = Net.random(layers)
        net.train(env, generations, render=False, print_stats=False)
        n = 10  # times to run evaluation before taking average
        reward = sum(net.evaluate(env) for _ in range(n)) / n

        if reward < want_reward:
            print(f'FAILED({env_id}): want {reward} < {want_reward}')
        else:
            print(f'SOLVED({env_id}) reward: {reward} >= {want_reward}')
Example #2
0
from main import Net


def heading(msg):
    print(('=' * 20) + ' ' + msg + ' ' + ('=' * 20))


# Simple handcrafted test of net.set_from_params
heading('Test handcrafted')
net = Net.random((3, 2, 4, 6))
p = net.params()
p[-1] = 100
p[0] = 200

print([w.shape for w in net.weights])
net = Net.from_params(p, net.layers)
print(f'Biases {[b.shape for b in net.biases]}\n', net.biases)
print(f'Weights {[w.shape for w in net.weights]}\n', net.weights)
assert net.biases[0][0] == 200
got = net.weights[-1][-1][-1]
assert got == 100, f'want 100 got {got}'

# ===========================
# global test, add 20 to all (does not test structure!)

heading('Test add 20')
net = Net.random((3, 2, 4, 5))
p = net.params()
p = p + 20

net = Net.from_params(p, net.layers)