Example #1
0
def run():
    decoder_input = Input(2)
    decoder = Tanh(Affine(decoder_input, 300))
    decoder = Affine(decoder, 28 * 28)
    decoder.load_params(np.load(sys.argv[1]))

    def get_img(x):
        return decoder(x).reshape(28, 28)

    fig, ax = plt.subplots()
    plt.subplots_adjust(bottom=0.35)

    img = ax.imshow(get_img(np.zeros(decoder_input.n_outputs)),
                    cmap="gray",
                    vmin=-1,
                    vmax=1)

    slider_ax = [
        plt.axes([0.05, 0.05 * (i + 1), 0.9, 0.03])
        for i in range(decoder_input.n_outputs)
    ]
    sliders = [Slider(a, '', -3.0, 3.0, valinit=0.0) for a in slider_ax]

    def update(val):
        x = [s.val for s in sliders]
        img.set_data(get_img(x))
        fig.canvas.draw_idle()

    for s in sliders:
        s.on_changed(update)

    plt.show()
Example #2
0
def run():
    LATENT_SIZE = 3

    train_x = np.load("__mnist.npz")['train_x']

    encoder = Input(28 * 28)
    encoder = Tanh(Affine(encoder, 300))
    encoder = Gauss(mean=Affine(encoder, LATENT_SIZE),
                    logstd=Params(LATENT_SIZE))

    dkl = DKLUninormal(mean=encoder.mean, logstd=encoder.logstd)

    decoder_input = Input(LATENT_SIZE)
    decoder = Tanh(Affine(decoder_input, 300))
    decoder = Gauss(mean=Affine(decoder, 28 * 28),
                    logstd=Const(np.zeros(28 * 28) - 3))

    encOptimizer = Adam(encoder.get_params(), horizon=10, lr=0.01)
    decOptimizer = Adam(decoder.get_params(), horizon=10, lr=0.01)

    for i in range(10000):
        idx = np.random.choice(len(train_x), size=128)
        pics = train_x[idx]

        encoder.load_params(encOptimizer.get_value())
        decoder.load_params(decOptimizer.get_value())

        representation, encBackprop = encoder.sample.evaluate(pics)

        picsLogprob, decBackprop = decoder.logprob.evaluate(representation,
                                                            sample=pics)

        dklValue, dklBackprop = dkl.evaluate(pics)

        decOptimizer.apply_gradient(decBackprop(np.ones(128)))

        encOptimizer.apply_gradient(
            dklBackprop(-np.ones(128)) +
            encBackprop(decoder_input.last_gradient))

        print("Logprob:", bar(np.mean(picsLogprob), 20000), "DKL:",
              bar(np.mean(dklValue), 200))

        if i % 100 == 99:
            plt.clf()
            fig, plots = plt.subplots(2)
            changedPic = decoder.mean(representation[43])
            plots[0].imshow(changedPic.reshape(28, 28),
                            cmap="gray",
                            vmin=0,
                            vmax=1)
            plots[1].imshow(pics[43].reshape(28, 28),
                            cmap="gray",
                            vmin=0,
                            vmax=1)
            fig.savefig("step_%05d.png" % (i + 1), dpi=100)

        if i % 1000 == 999:
            np.save("step_%05d_decoder.npy" % (i + 1), decoder.get_params())
Example #3
0
def run():
    train_x = np.load("__mnist.npz")['train_x']

    encoder = Input(28, 28)
    encoder = Tanh(Affine(encoder, 256))
    encoder = Tanh(Affine(encoder, 256))
    encoder = Gauss(
        mean=Affine(encoder, 3, init=0.1),
        logstd=Clip(Affine(encoder, 3), -6.0, 0.0)
    )

    dkl = DKLUninormal(mean=encoder.mean, logstd=encoder.logstd)

    decoder = encoder.sample
    decoder = Tanh(Affine(decoder, 256))
    decoder = Tanh(Affine(decoder, 256))
    decoder = Gauss(
        mean=Affine(decoder, 28, 28, init=0.1),
        logstd=Clip(Affine(decoder, 28, 28), -6.0, 0.0)
    )

    momentum = 0.0

    for i in range(10000):
        inps = train_x[np.random.choice(len(train_x), size=128)]

        logprob, backprop = decoder.logprob.evaluate(inps, sample=inps)
        grad1 = backprop(np.ones(128))

        dkl_value, backprop = dkl.evaluate(inps)
        grad2 = backprop(-np.ones(128))

        grad1[:len(grad2)] += grad2
        momentum = momentum * 0.9 + grad1 * 0.1
        momentum = np.clip(momentum, -1.0, 1.0)
        decoder.load_params(decoder.get_params() + 0.001 * momentum)

        print(
            "Logprob:", bar(np.mean(logprob), 2000.0, length=20),
            "DKL:", bar(np.mean(dkl_value), 200.0, length=20),
        )

        if i % 100 == 99:
            fig, plots = plt.subplots(3, 2)
            for inp, pair in zip(inps, plots):
                for img, plot in zip([inp, decoder.mean(inp)], pair):
                    plot.imshow(img, cmap="gray")
            fig.set_size_inches(4, 6)
            fig.savefig("step_%05d.png"%(i+1), dpi=100)
            plt.close(fig)
Example #4
0
def build_policy(env, *, hid_layers=2, hid_size=64):
    from mannequin.basicnet import Input, Affine, Tanh

    policy = Input(env.observation_space.low.size)
    for _ in range(hid_layers):
        policy = Tanh(Affine(policy, hid_size))
    policy = Affine(policy, env.action_space.low.size)

    return policy
Example #5
0
def SimplePredictor(in_size, out_size):
    model = Input(in_size)
    for _ in range(2):
        model = Tanh(Affine(model, 64))
    model = Affine(model, out_size, init=0.1)

    opt = Adam(model.get_params(), horizon=10, lr=0.01)

    def sgd_step(inps, lbls):
        outs, backprop = model.evaluate(inps)
        opt.apply_gradient(backprop(lbls - outs))
        model.load_params(opt.get_value())

    model.sgd_step = sgd_step
    return model
Example #6
0
def run():
    LATENT_SIZE = 2

    encoder = Input(gen_traj().size)
    encoder = Tanh(Affine(encoder, 300))
    encoder = Affine(encoder, LATENT_SIZE), Params(LATENT_SIZE)

    dkl = DKLUninormal(mean=encoder[0], logstd=encoder[1])
    encoder = Gauss(mean=encoder[0], logstd=encoder[1])

    decoder_input = Input(LATENT_SIZE)
    decoder = Tanh(Affine(decoder_input, 300))
    decoder = Affine(decoder, gen_traj().size)
    mean_dec = decoder
    decoder = Gauss(mean=decoder)

    encOptimizer = Adam(encoder.get_params(), horizon=10, lr=0.01)
    decOptimizer = Adam(decoder.get_params(), horizon=10, lr=0.01)

    for i in range(10000):
        inps = [gen_traj() for i in range(128)]

        encoder.load_params(encOptimizer.get_value())
        decoder.load_params(decOptimizer.get_value())

        representation, encBackprop = encoder.sample.evaluate(inps)

        inpsLogprob, decBackprop = decoder.logprob.evaluate(representation,
                                                            sample=inps)

        dklValue, dklBackprop = dkl.evaluate(inps)

        decOptimizer.apply_gradient(decBackprop(np.ones(128)))

        encOptimizer.apply_gradient(
            dklBackprop(-np.ones(128)) +
            encBackprop(decoder_input.last_gradient))
        print(decoder_input.last_gradient)

        print("Logprob:", bar(np.mean(inpsLogprob), 20000), "DKL:",
              bar(np.mean(dklValue), 200))

        if i % 25 == 24:
            save_plot(
                "test_%05d.png" % (i + 1),
                mean_dec(np.random.randn(64, LATENT_SIZE)),
                inps,
                mean_dec(representation),
            )
Example #7
0
def stochastic_policy(env, *, hid_layers=2, hid_size=64):
    import gym
    from mannequin.basicnet import Input, Affine, Tanh
    from mannequin.distrib import Discrete, Gauss

    if isinstance(env.action_space, gym.spaces.Box):
        action_size = env.action_space.low.size
        Distribution = lambda p: Gauss(mean=p)
    elif isinstance(env.action_space, gym.spaces.Discrete):
        action_size = env.action_space.n
        Distribution = lambda p: Discrete(logits=p)
    else:
        raise ValueError("Unsupported action space")

    policy = Input(env.observation_space.low.size)
    for _ in range(hid_layers):
        policy = Tanh(Affine(policy, hid_size))
    policy = Affine(policy, action_size, init=0.1)
    policy = Distribution(policy)

    return policy