hyperparameters = dict( # "eigh" on covariance matrix or "svd" on data matrix decomposition="svd", # whether to remix after whitening zca=True, # compute fisher based on supervised "loss" or model "output" objective="output", # eigenvalue bias eigenvalue_bias=1e-3, variance_bias=1e-8, batch_normalize=False, whiten_inputs=False, share_parameters=True) datasets = mnist.get_data() features = T.matrix("features") targets = T.ivector("targets") #theano.config.compute_test_value = "warn" #features.tag.test_value = datasets["valid"]["features"] #targets.tag.test_value = datasets["valid"]["targets"] # compilation helpers compile_memo = dict() def compile(variables=(), updates=()): key = (util.tupelo(variables), tuple(OrderedDict(updates).items())) try:
import sys from collections import OrderedDict import numpy as np import theano import theano.tensor as T import util, activation, initialization, steprules, whitening, mnist learning_rate = 1e-3 # use batch normalization in addition to PRONG (i.e. PRONG+) batch_normalize = False data = mnist.get_data() n_outputs = 10 dims = [784, 500, 300, 100, n_outputs] layers = [ dict(f=activation.tanh, c=util.shared_floatx((m,), initialization.constant(0)), # input mean U=util.shared_floatx((m, m), initialization.identity()), # input whitening matrix W=util.shared_floatx((m, n), initialization.orthogonal()), # weight matrix g=util.shared_floatx((n,), initialization.constant(1)), # gammas (for batch normalization) b=util.shared_floatx((n,), initialization.constant(0))) # bias for m, n in util.safezip(dims[:-1], dims[1:])] layers[-1]["f"] = activation.logsoftmax features, targets = T.matrix("features"), T.ivector("targets") #theano.config.compute_test_value = "warn" #features.tag.test_value = data["valid"]["features"][:11] #targets.tag.test_value = data["valid"]["targets"][:11]