def test(): state = 10 rng = rng_mrg.MRG_RandomStreams(seed=state) x_dim = 64 h_dim = 100 x_in = layers.InputLayer((None, x_dim)) l_out = Layer(x_in, num_units=h_dim) model_encoder = LightweightModel([x_in], [l_out]) h_in = layers.InputLayer((None, h_dim)) l_out = Layer(h_in, num_units=x_dim) model_decoder = LightweightModel([h_in], [l_out]) h_in = layers.InputLayer((None, 1)) l_out = Layer(h_in, num_units=h_dim) model_prior = LightweightModel([h_in], [l_out]) def loss_function(model, tensors): X = tensors["X"] (h, u), = model_encoder.get_output(X, mode="sample", rng=rng) #print(X.ndim, h.ndim) (q_h_given_x, _), = model_encoder.get_output(X, mode="evaluate", on=h) (p_x_given_h, _), = model_decoder.get_output(h, mode="evaluate", on=X) ones = T.alloc(np.cast[theano.config.floatX](1.), *h.shape) zeros = T.alloc(np.cast[theano.config.floatX](0.), *h.shape) (p_h, _), = model_prior.get_output(zeros[:, 0:1], mode="evaluate", on=ones) L = -((T.log(p_x_given_h).sum(axis=1) + T.log(p_h).sum(axis=1) - T.log(q_h_given_x).sum(axis=1))) return (L.mean()), u input_variables = OrderedDict( X=dict(tensor_type=T.matrix), ) functions = dict( ) # sample function nb = T.iscalar() sample_input = T.alloc(np.cast[theano.config.floatX](0.), nb, 1) from theano.updates import OrderedUpdates u = OrderedUpdates() (s_h, u_h), = model_prior.get_output(sample_input, mode="sample", rng=rng) ones = T.alloc(np.cast[theano.config.floatX](1.), s_h.shape[0], x_dim) (s_x, u_x), = model_decoder.get_output(s_h, mode="evaluate", on=ones) sample = theano.function([nb], s_x, updates=u_x) batch_optimizer = BatchOptimizer(verbose=1, max_nb_epochs=100, batch_size=256, optimization_procedure=(updates.rmsprop, {"learning_rate": 0.0001}) ) class Container(object): def __init__(self, models): self.models = models def get_all_params(self): return [p for m in self.models for p in m.get_all_params()] models = [model_encoder, model_decoder] models = Container(models) darn = Capsule(input_variables, models, loss_function, functions=functions, batch_optimizer=batch_optimizer) from sklearn.datasets import load_digits def build_digits(): digits = load_digits() X = digits.data X = X.astype(np.float32) / 16. return X, digits.images.shape[1:] X, imshape = build_digits() darn.fit(X=X) s=(sample(20)) s = get_2d_square_image_view(s) grid_plot(s, imshow_options={"cmap": "gray"}) plt.savefig("out.png")
input_variables = OrderedDict( X=dict(tensor_type=T.matrix), ) functions = dict( sample=dict( get_output=lambda model, X: model.get_output(X, sampler=True)[0], params=["X"] ), log_likelihood=dict( get_output=lambda model, X: T.log(model.get_output(X)[0]).sum(axis=1), params=["X"] ), ) batch_optimizer = BatchOptimizer(verbose=1, max_nb_epochs=2, batch_size=256, optimization_procedure=(updates.rmsprop, {"learning_rate": 0.001}) ) nade = Capsule(input_variables, model, loss_function, functions=functions, batch_optimizer=batch_optimizer) nade.fit(X=X) T = np.ones((100, x_dim)).astype(np.float32) T = T[:, order] s = get_2d_square_image_view(nade.sample(T)) grid_plot(s, imshow_options={"cmap": "gray"}) plt.savefig("out.png")