def run(): LATENT_SIZE = 3 train_x = np.load("__mnist.npz")['train_x'] encoder = Input(28 * 28) encoder = Tanh(Affine(encoder, 300)) encoder = Gauss(mean=Affine(encoder, LATENT_SIZE), logstd=Params(LATENT_SIZE)) dkl = DKLUninormal(mean=encoder.mean, logstd=encoder.logstd) decoder_input = Input(LATENT_SIZE) decoder = Tanh(Affine(decoder_input, 300)) decoder = Gauss(mean=Affine(decoder, 28 * 28), logstd=Const(np.zeros(28 * 28) - 3)) encOptimizer = Adam(encoder.get_params(), horizon=10, lr=0.01) decOptimizer = Adam(decoder.get_params(), horizon=10, lr=0.01) for i in range(10000): idx = np.random.choice(len(train_x), size=128) pics = train_x[idx] encoder.load_params(encOptimizer.get_value()) decoder.load_params(decOptimizer.get_value()) representation, encBackprop = encoder.sample.evaluate(pics) picsLogprob, decBackprop = decoder.logprob.evaluate(representation, sample=pics) dklValue, dklBackprop = dkl.evaluate(pics) decOptimizer.apply_gradient(decBackprop(np.ones(128))) encOptimizer.apply_gradient( dklBackprop(-np.ones(128)) + encBackprop(decoder_input.last_gradient)) print("Logprob:", bar(np.mean(picsLogprob), 20000), "DKL:", bar(np.mean(dklValue), 200)) if i % 100 == 99: plt.clf() fig, plots = plt.subplots(2) changedPic = decoder.mean(representation[43]) plots[0].imshow(changedPic.reshape(28, 28), cmap="gray", vmin=0, vmax=1) plots[1].imshow(pics[43].reshape(28, 28), cmap="gray", vmin=0, vmax=1) fig.savefig("step_%05d.png" % (i + 1), dpi=100) if i % 1000 == 999: np.save("step_%05d_decoder.npy" % (i + 1), decoder.get_params())
def run(): train_x = np.load("__mnist.npz")['train_x'] encoder = Input(28, 28) encoder = Tanh(Affine(encoder, 256)) encoder = Tanh(Affine(encoder, 256)) encoder = Gauss( mean=Affine(encoder, 3, init=0.1), logstd=Clip(Affine(encoder, 3), -6.0, 0.0) ) dkl = DKLUninormal(mean=encoder.mean, logstd=encoder.logstd) decoder = encoder.sample decoder = Tanh(Affine(decoder, 256)) decoder = Tanh(Affine(decoder, 256)) decoder = Gauss( mean=Affine(decoder, 28, 28, init=0.1), logstd=Clip(Affine(decoder, 28, 28), -6.0, 0.0) ) momentum = 0.0 for i in range(10000): inps = train_x[np.random.choice(len(train_x), size=128)] logprob, backprop = decoder.logprob.evaluate(inps, sample=inps) grad1 = backprop(np.ones(128)) dkl_value, backprop = dkl.evaluate(inps) grad2 = backprop(-np.ones(128)) grad1[:len(grad2)] += grad2 momentum = momentum * 0.9 + grad1 * 0.1 momentum = np.clip(momentum, -1.0, 1.0) decoder.load_params(decoder.get_params() + 0.001 * momentum) print( "Logprob:", bar(np.mean(logprob), 2000.0, length=20), "DKL:", bar(np.mean(dkl_value), 200.0, length=20), ) if i % 100 == 99: fig, plots = plt.subplots(3, 2) for inp, pair in zip(inps, plots): for img, plot in zip([inp, decoder.mean(inp)], pair): plot.imshow(img, cmap="gray") fig.set_size_inches(4, 6) fig.savefig("step_%05d.png"%(i+1), dpi=100) plt.close(fig)
def run(): LATENT_SIZE = 2 encoder = Input(gen_traj().size) encoder = Tanh(Affine(encoder, 300)) encoder = Affine(encoder, LATENT_SIZE), Params(LATENT_SIZE) dkl = DKLUninormal(mean=encoder[0], logstd=encoder[1]) encoder = Gauss(mean=encoder[0], logstd=encoder[1]) decoder_input = Input(LATENT_SIZE) decoder = Tanh(Affine(decoder_input, 300)) decoder = Affine(decoder, gen_traj().size) mean_dec = decoder decoder = Gauss(mean=decoder) encOptimizer = Adam(encoder.get_params(), horizon=10, lr=0.01) decOptimizer = Adam(decoder.get_params(), horizon=10, lr=0.01) for i in range(10000): inps = [gen_traj() for i in range(128)] encoder.load_params(encOptimizer.get_value()) decoder.load_params(decOptimizer.get_value()) representation, encBackprop = encoder.sample.evaluate(inps) inpsLogprob, decBackprop = decoder.logprob.evaluate(representation, sample=inps) dklValue, dklBackprop = dkl.evaluate(inps) decOptimizer.apply_gradient(decBackprop(np.ones(128))) encOptimizer.apply_gradient( dklBackprop(-np.ones(128)) + encBackprop(decoder_input.last_gradient)) print(decoder_input.last_gradient) print("Logprob:", bar(np.mean(inpsLogprob), 20000), "DKL:", bar(np.mean(dklValue), 200)) if i % 25 == 24: save_plot( "test_%05d.png" % (i + 1), mean_dec(np.random.randn(64, LATENT_SIZE)), inps, mean_dec(representation), )