pmp_input = NormalGauss(shape)
sample = pmp_input.sample()
sample = T.tile(sample, (timesteps, 1, 1), ndim=len(shape))

mean = sample[:, :, :n_mean_par]
var = sample[:, :, n_mean_par:]**2
uu = mean

pmp = ProbabilisticMovementPrimitive(n_bases,
                                     mean,
                                     var**2,
                                     uu,
                                     width=width,
                                     eps=1e-5)

diag = DiagGauss(dmean, dvar**2)

kl_coord_wise = kl_div(diag, pmp)
kl_sample_wise = kl_coord_wise.sum(axis=2)
kl = kl_sample_wise.mean()

grad_kl = T.grad(kl, [dmean, dvar])

foo_kl = theano.function([dmean, dvar], kl)
foo_grad = theano.function([dmean, dvar], grad_kl)


def evaluate(trials, repeats_per_trial, batch_size):
    kls = []
    grads = []