def test2(): SupervisedAgent = Softmax( logits=BasicNet(hidden_layers=[32], batch_size=16, params=GradAscent(n_steps=8, log_lr=0.9, init=Gauss))) RLAgent = WholeTrajectories(agent=RandomChoice(p=Reinforce( agent=SupervisedAgent))) agent = RLAgent(world, 123) s = score(agent) sys.stderr.write("Reward/episode (policy network): %.5f\n" % s) print("Policy network sanity check:", s >= 50.0) if "DEBUG" in os.environ: timer() for _ in range(5): world.render(agent) else: assert timer() < 4.0
x = 0.0 def reset(): nonlocal x x = 0.0 return [x] def step(action): nonlocal x x += 1.0 if action[1] > action[0] else -1.0 return [x], (1.0 if x > 3.5 else 0.0), abs(x) > 3.5, {} self.reset = reset self.step = step self.observation_space = gym.spaces.Box(-4.0, 4.0, (1,)) self.action_space = gym.spaces.Box(0.0, 1.0, (2,)) env = FakeEnv() traj = episode(env, lambda i: [1.0, 0.0]) for e in zip(traj.o, traj.a, traj.r): print(*e) print() for i in range(6): print(*one_step(env, lambda i: [0.0, 1.0])) print() traj = episode(env, lambda i: [0.0, 1.0]) for e in zip(traj.o, traj.a, traj.r): print(*e) assert timer(print_info=False) < 0.02
import sys import numpy as np import tensorflow as tf tf.Session().run(tf.placeholder_with_default(0, shape=None)) from test_setup import timer from worlds import * from agents import * from values import * assert timer() < 0.05 def score(agent, n_episodes=1000): world = Accuracy(Mnist(test=True)) rew_sum = 0.0 for t in world.trajectories(agent, n_episodes): for o, a, r in t: rew_sum += np.mean(r) return rew_sum / n_episodes def test1(): agent = RandomChoice(Mnist(), 123, p=0.1) acc = score(agent) sys.stderr.write("Random choice accuracy: %.1f%%\n" % (acc * 100.0)) print("Random choice sanity check:", acc > 0.05, acc < 0.15) def test2(): SupervisedAgent = Softmax( logits=BasicNet( hidden_layers=[128], batch_size=128,
import numpy as np from mannequin.basicnet import * from test_setup import timer def sample_values(Model, *, dims): values = [] for _ in range(1000): a, b = dims() model = Model(Input(a), b) v1 = np.random.randn(a) v2, _ = model.evaluate(v1) values.append(v2.reshape(-1)) return np.concatenate(values) def check_std(Model, ci=(0.97, 1.03), **args): values = sample_values(Model, **args) std = np.std(values) sys.stderr.write("Stddev: %.2f\n" % std) assert (std > ci[0]) and (std < ci[1]) rand_dims = lambda: np.random.randint(16, size=2) + 5 check_std(Linear, dims=lambda: (5, 20)) check_std(Linear, dims=lambda: (20, 5)) check_std(Linear, dims=rand_dims) check_std(Affine, dims=rand_dims) check_std(Affine, dims=lambda: (5, 20)) check_std(lambda *p: Linear(*p, init=10.0), dims=rand_dims, ci=(9.7, 10.3)) assert timer() < 1.0
model = Input(in_size) for _ in range(2): model = Tanh(Affine(model, 64)) model = Affine(model, out_size, init=0.1) opt = Adam(model.get_params(), horizon=10, lr=0.01) def sgd_step(inps, lbls): outs, backprop = model.evaluate(inps) opt.apply_gradient(backprop(lbls - outs)) model.load_params(opt.get_value()) model.sgd_step = sgd_step return model pred = SimplePredictor(1, 1) for _ in range(100): x = np.random.randn(128, 1) * 2.0 pred.sgd_step(x, np.sin(x)) assert pred([1.0]).shape == (1,) x = np.linspace(-5.0, 5.0, 101).reshape(-1, 1) y = pred(x) assert y.shape == x.shape error = np.mean(np.abs(y - np.sin(x))) sys.stderr.write("Mean error: %.4f\n" % error) assert error < 0.2 assert timer() < 0.3
outs, backprop = model.evaluate(inps) opt.apply_gradient(backprop(lbls - outs)) model.load_params(opt.get_value()) model.sgd_step = sgd_step return model def func(x, y): return [np.exp(x), x * y] pred = SimplePredictor(2, 2) for _ in range(500): xy = np.random.randn(256).reshape(-1, 2) pred.sgd_step(xy, [func(x, y) for x, y in xy]) assert pred(np.eye(2)).shape == (2, 2) errors = [] for x in np.linspace(-2.0, 2.0, 21): for y in np.linspace(-2.0, 2.0, 21): p = pred([x, y]) assert p.shape == (2, ) errors.append(np.abs(p - func(x, y))) errors = np.mean(errors, axis=0) sys.stderr.write("Mean errors: %.4f %.4f\n" % tuple(errors)) assert (errors < 0.2).all() assert timer() < 2.0