def run(): decoder_input = Input(2) decoder = Tanh(Affine(decoder_input, 300)) decoder = Affine(decoder, 28 * 28) decoder.load_params(np.load(sys.argv[1])) def get_img(x): return decoder(x).reshape(28, 28) fig, ax = plt.subplots() plt.subplots_adjust(bottom=0.35) img = ax.imshow(get_img(np.zeros(decoder_input.n_outputs)), cmap="gray", vmin=-1, vmax=1) slider_ax = [ plt.axes([0.05, 0.05 * (i + 1), 0.9, 0.03]) for i in range(decoder_input.n_outputs) ] sliders = [Slider(a, '', -3.0, 3.0, valinit=0.0) for a in slider_ax] def update(val): x = [s.val for s in sliders] img.set_data(get_img(x)) fig.canvas.draw_idle() for s in sliders: s.on_changed(update) plt.show()
def run(): LATENT_SIZE = 3 train_x = np.load("__mnist.npz")['train_x'] encoder = Input(28 * 28) encoder = Tanh(Affine(encoder, 300)) encoder = Gauss(mean=Affine(encoder, LATENT_SIZE), logstd=Params(LATENT_SIZE)) dkl = DKLUninormal(mean=encoder.mean, logstd=encoder.logstd) decoder_input = Input(LATENT_SIZE) decoder = Tanh(Affine(decoder_input, 300)) decoder = Gauss(mean=Affine(decoder, 28 * 28), logstd=Const(np.zeros(28 * 28) - 3)) encOptimizer = Adam(encoder.get_params(), horizon=10, lr=0.01) decOptimizer = Adam(decoder.get_params(), horizon=10, lr=0.01) for i in range(10000): idx = np.random.choice(len(train_x), size=128) pics = train_x[idx] encoder.load_params(encOptimizer.get_value()) decoder.load_params(decOptimizer.get_value()) representation, encBackprop = encoder.sample.evaluate(pics) picsLogprob, decBackprop = decoder.logprob.evaluate(representation, sample=pics) dklValue, dklBackprop = dkl.evaluate(pics) decOptimizer.apply_gradient(decBackprop(np.ones(128))) encOptimizer.apply_gradient( dklBackprop(-np.ones(128)) + encBackprop(decoder_input.last_gradient)) print("Logprob:", bar(np.mean(picsLogprob), 20000), "DKL:", bar(np.mean(dklValue), 200)) if i % 100 == 99: plt.clf() fig, plots = plt.subplots(2) changedPic = decoder.mean(representation[43]) plots[0].imshow(changedPic.reshape(28, 28), cmap="gray", vmin=0, vmax=1) plots[1].imshow(pics[43].reshape(28, 28), cmap="gray", vmin=0, vmax=1) fig.savefig("step_%05d.png" % (i + 1), dpi=100) if i % 1000 == 999: np.save("step_%05d_decoder.npy" % (i + 1), decoder.get_params())
def run(): train_x = np.load("__mnist.npz")['train_x'] encoder = Input(28, 28) encoder = Tanh(Affine(encoder, 256)) encoder = Tanh(Affine(encoder, 256)) encoder = Gauss( mean=Affine(encoder, 3, init=0.1), logstd=Clip(Affine(encoder, 3), -6.0, 0.0) ) dkl = DKLUninormal(mean=encoder.mean, logstd=encoder.logstd) decoder = encoder.sample decoder = Tanh(Affine(decoder, 256)) decoder = Tanh(Affine(decoder, 256)) decoder = Gauss( mean=Affine(decoder, 28, 28, init=0.1), logstd=Clip(Affine(decoder, 28, 28), -6.0, 0.0) ) momentum = 0.0 for i in range(10000): inps = train_x[np.random.choice(len(train_x), size=128)] logprob, backprop = decoder.logprob.evaluate(inps, sample=inps) grad1 = backprop(np.ones(128)) dkl_value, backprop = dkl.evaluate(inps) grad2 = backprop(-np.ones(128)) grad1[:len(grad2)] += grad2 momentum = momentum * 0.9 + grad1 * 0.1 momentum = np.clip(momentum, -1.0, 1.0) decoder.load_params(decoder.get_params() + 0.001 * momentum) print( "Logprob:", bar(np.mean(logprob), 2000.0, length=20), "DKL:", bar(np.mean(dkl_value), 200.0, length=20), ) if i % 100 == 99: fig, plots = plt.subplots(3, 2) for inp, pair in zip(inps, plots): for img, plot in zip([inp, decoder.mean(inp)], pair): plot.imshow(img, cmap="gray") fig.set_size_inches(4, 6) fig.savefig("step_%05d.png"%(i+1), dpi=100) plt.close(fig)
def build_policy(env, *, hid_layers=2, hid_size=64): from mannequin.basicnet import Input, Affine, Tanh policy = Input(env.observation_space.low.size) for _ in range(hid_layers): policy = Tanh(Affine(policy, hid_size)) policy = Affine(policy, env.action_space.low.size) return policy
def SimplePredictor(in_size, out_size): model = Input(in_size) for _ in range(2): model = Tanh(Affine(model, 64)) model = Affine(model, out_size, init=0.1) opt = Adam(model.get_params(), horizon=10, lr=0.01) def sgd_step(inps, lbls): outs, backprop = model.evaluate(inps) opt.apply_gradient(backprop(lbls - outs)) model.load_params(opt.get_value()) model.sgd_step = sgd_step return model
def run(): LATENT_SIZE = 2 encoder = Input(gen_traj().size) encoder = Tanh(Affine(encoder, 300)) encoder = Affine(encoder, LATENT_SIZE), Params(LATENT_SIZE) dkl = DKLUninormal(mean=encoder[0], logstd=encoder[1]) encoder = Gauss(mean=encoder[0], logstd=encoder[1]) decoder_input = Input(LATENT_SIZE) decoder = Tanh(Affine(decoder_input, 300)) decoder = Affine(decoder, gen_traj().size) mean_dec = decoder decoder = Gauss(mean=decoder) encOptimizer = Adam(encoder.get_params(), horizon=10, lr=0.01) decOptimizer = Adam(decoder.get_params(), horizon=10, lr=0.01) for i in range(10000): inps = [gen_traj() for i in range(128)] encoder.load_params(encOptimizer.get_value()) decoder.load_params(decOptimizer.get_value()) representation, encBackprop = encoder.sample.evaluate(inps) inpsLogprob, decBackprop = decoder.logprob.evaluate(representation, sample=inps) dklValue, dklBackprop = dkl.evaluate(inps) decOptimizer.apply_gradient(decBackprop(np.ones(128))) encOptimizer.apply_gradient( dklBackprop(-np.ones(128)) + encBackprop(decoder_input.last_gradient)) print(decoder_input.last_gradient) print("Logprob:", bar(np.mean(inpsLogprob), 20000), "DKL:", bar(np.mean(dklValue), 200)) if i % 25 == 24: save_plot( "test_%05d.png" % (i + 1), mean_dec(np.random.randn(64, LATENT_SIZE)), inps, mean_dec(representation), )
def stochastic_policy(env, *, hid_layers=2, hid_size=64): import gym from mannequin.basicnet import Input, Affine, Tanh from mannequin.distrib import Discrete, Gauss if isinstance(env.action_space, gym.spaces.Box): action_size = env.action_space.low.size Distribution = lambda p: Gauss(mean=p) elif isinstance(env.action_space, gym.spaces.Discrete): action_size = env.action_space.n Distribution = lambda p: Discrete(logits=p) else: raise ValueError("Unsupported action space") policy = Input(env.observation_space.low.size) for _ in range(hid_layers): policy = Tanh(Affine(policy, hid_size)) policy = Affine(policy, action_size, init=0.1) policy = Distribution(policy) return policy