# input, hidden and output dims
hidden_dims = 32 * [32]
dims = [input_dim, hidden_dims[0]] + hidden_dims + [n_outputs]

# allocate parameters
fs = [activation.tanh for _ in dims[1:-1]] + [activation.logsoftmax]
# layer input means
cs = [util.shared_floatx((m,), initialization.constant(0))
        for m in dims[:-1]]
# layer input whitening matrices
Us = [util.shared_floatx((m, m), initialization.identity())
        for m in dims[:-1]]
# weight matrices
Ws = [util.shared_floatx((m, n), initialization.orthogonal())
      for m, n in util.safezip(dims[:-1], dims[1:])]
# batch normalization diagonal scales
gammas = [util.shared_floatx((n, ), initialization.constant(1))
            for n in dims[1:]]
# biases or betas
bs = [util.shared_floatx((n, ), initialization.constant(0))
      for n in dims[1:]]

# reparametrization updates
updates = []
# theano graphs with assertions & breakpoints, to be evaluated after
# performing the updates
checks = []

parameters_by_layer = []