Exemple #1
0
def test2():
    SupervisedAgent = Softmax(
        logits=BasicNet(hidden_layers=[32],
                        batch_size=16,
                        params=GradAscent(n_steps=8, log_lr=0.9, init=Gauss)))
    RLAgent = WholeTrajectories(agent=RandomChoice(p=Reinforce(
        agent=SupervisedAgent)))
    agent = RLAgent(world, 123)
    s = score(agent)
    sys.stderr.write("Reward/episode (policy network): %.5f\n" % s)
    print("Policy network sanity check:", s >= 50.0)

    if "DEBUG" in os.environ:
        timer()
        for _ in range(5):
            world.render(agent)
    else:
        assert timer() < 4.0
Exemple #2
0
        x = 0.0
        def reset():
            nonlocal x
            x = 0.0
            return [x]
        def step(action):
            nonlocal x
            x += 1.0 if action[1] > action[0] else -1.0
            return [x], (1.0 if x > 3.5 else 0.0), abs(x) > 3.5, {}
        self.reset = reset
        self.step = step
        self.observation_space = gym.spaces.Box(-4.0, 4.0, (1,))
        self.action_space = gym.spaces.Box(0.0, 1.0, (2,))

env = FakeEnv()

traj = episode(env, lambda i: [1.0, 0.0])
for e in zip(traj.o, traj.a, traj.r):
    print(*e)

print()
for i in range(6):
    print(*one_step(env, lambda i: [0.0, 1.0]))

print()
traj = episode(env, lambda i: [0.0, 1.0])
for e in zip(traj.o, traj.a, traj.r):
    print(*e)

assert timer(print_info=False) < 0.02
Exemple #3
0
import sys
import numpy as np
import tensorflow as tf
tf.Session().run(tf.placeholder_with_default(0, shape=None))

from test_setup import timer
from worlds import *
from agents import *
from values import *
assert timer() < 0.05

def score(agent, n_episodes=1000):
    world = Accuracy(Mnist(test=True))
    rew_sum = 0.0
    for t in world.trajectories(agent, n_episodes):
        for o, a, r in t:
            rew_sum += np.mean(r)
    return rew_sum / n_episodes

def test1():
    agent = RandomChoice(Mnist(), 123, p=0.1)
    acc = score(agent)
    sys.stderr.write("Random choice accuracy: %.1f%%\n" % (acc * 100.0))
    print("Random choice sanity check:", acc > 0.05, acc < 0.15)

def test2():
    SupervisedAgent = Softmax(
        logits=BasicNet(
            hidden_layers=[128],
            batch_size=128,
Exemple #4
0
import numpy as np
from mannequin.basicnet import *
from test_setup import timer

def sample_values(Model, *, dims):
    values = []
    for _ in range(1000):
        a, b = dims()
        model = Model(Input(a), b)
        v1 = np.random.randn(a)
        v2, _ = model.evaluate(v1)
        values.append(v2.reshape(-1))
    return np.concatenate(values)

def check_std(Model, ci=(0.97, 1.03), **args):
    values = sample_values(Model, **args)
    std = np.std(values)
    sys.stderr.write("Stddev: %.2f\n" % std)
    assert (std > ci[0]) and (std < ci[1])

rand_dims = lambda: np.random.randint(16, size=2) + 5

check_std(Linear, dims=lambda: (5, 20))
check_std(Linear, dims=lambda: (20, 5))
check_std(Linear, dims=rand_dims)
check_std(Affine, dims=rand_dims)
check_std(Affine, dims=lambda: (5, 20))
check_std(lambda *p: Linear(*p, init=10.0), dims=rand_dims, ci=(9.7, 10.3))

assert timer() < 1.0
Exemple #5
0
    model = Input(in_size)
    for _ in range(2):
        model = Tanh(Affine(model, 64))
    model = Affine(model, out_size, init=0.1)

    opt = Adam(model.get_params(), horizon=10, lr=0.01)

    def sgd_step(inps, lbls):
        outs, backprop = model.evaluate(inps)
        opt.apply_gradient(backprop(lbls - outs))
        model.load_params(opt.get_value())

    model.sgd_step = sgd_step
    return model

pred = SimplePredictor(1, 1)
for _ in range(100):
    x = np.random.randn(128, 1) * 2.0
    pred.sgd_step(x, np.sin(x))

assert pred([1.0]).shape == (1,)

x = np.linspace(-5.0, 5.0, 101).reshape(-1, 1)
y = pred(x)
assert y.shape == x.shape
error = np.mean(np.abs(y - np.sin(x)))
sys.stderr.write("Mean error: %.4f\n" % error)
assert error < 0.2

assert timer() < 0.3
Exemple #6
0
        outs, backprop = model.evaluate(inps)
        opt.apply_gradient(backprop(lbls - outs))
        model.load_params(opt.get_value())

    model.sgd_step = sgd_step
    return model


def func(x, y):
    return [np.exp(x), x * y]


pred = SimplePredictor(2, 2)
for _ in range(500):
    xy = np.random.randn(256).reshape(-1, 2)
    pred.sgd_step(xy, [func(x, y) for x, y in xy])

assert pred(np.eye(2)).shape == (2, 2)

errors = []
for x in np.linspace(-2.0, 2.0, 21):
    for y in np.linspace(-2.0, 2.0, 21):
        p = pred([x, y])
        assert p.shape == (2, )
        errors.append(np.abs(p - func(x, y)))
errors = np.mean(errors, axis=0)
sys.stderr.write("Mean errors: %.4f %.4f\n" % tuple(errors))
assert (errors < 0.2).all()

assert timer() < 2.0