# input, hidden and output dims hidden_dims = 32 * [32] dims = [input_dim, hidden_dims[0]] + hidden_dims + [n_outputs] # allocate parameters fs = [activation.tanh for _ in dims[1:-1]] + [activation.logsoftmax] # layer input means cs = [util.shared_floatx((m,), initialization.constant(0)) for m in dims[:-1]] # layer input whitening matrices Us = [util.shared_floatx((m, m), initialization.identity()) for m in dims[:-1]] # weight matrices Ws = [util.shared_floatx((m, n), initialization.orthogonal()) for m, n in util.safezip(dims[:-1], dims[1:])] # batch normalization diagonal scales gammas = [util.shared_floatx((n, ), initialization.constant(1)) for n in dims[1:]] # biases or betas bs = [util.shared_floatx((n, ), initialization.constant(0)) for n in dims[1:]] # reparametrization updates updates = [] # theano graphs with assertions & breakpoints, to be evaluated after # performing the updates checks = [] parameters_by_layer = []