def __init__(self, anchors, K, D, alpha = 1.0, beta = 1.0, *args, **kwargs): #import pdb; pdb.set_trace() self.alpha = shared(alpha) self.beta = shared(beta) #mask contains zeros for elements fixed at 1E-6 mask = np.ones((K,D)) for anchor in anchors: #mask[:,anchor[1]] = 0 for hold in anchor[1]: mask[:,hold] = 0 mask[anchor[0],hold] = 1 self.mask = TT.as_tensor_variable(mask) # mask = TT.zeros_like(TT.as_tensor_variable(np.zeros((K,D)))) # for anchor in anchors: # TT.set_subtensor(mask[anchor[0],:], 0) # TT.set_subtensor(mask[anchor[0],anchor[1]], 1) # self.mask = mask super(Beta_with_anchors, self).__init__(transform=anchored_betas(mask=self.mask, K=K, D=D, alpha=alpha, beta=beta), *args, **kwargs) #super(Beta_with_anchors, self).__init__(transform=anchored_betas(anchors=anchors, K=K, D=D, alpha=alpha, beta=beta), *args, **kwargs) #Z = np.ones((D,K), np.float64) - 0.5 #self.mode = Z #TODO: Should this be numpy like ratematrix self.mean = TT.ones_like(self.mask)*1E-6 self.mean = TT.set_subtensor(self.mean[self.mask.nonzero()], (alpha / (alpha + beta)))
def __init__(self, input=tensor.dvector('input'), target=tensor.dvector('target'), n_input=1, n_hidden=1, n_output=1, lr=1e-3, **kw): super(NNet, self).__init__(**kw) self.input = input self.target = target self.lr = shared(lr, 'learning_rate') self.w1 = shared(numpy.zeros((n_hidden, n_input)), 'w1') self.w2 = shared(numpy.zeros((n_output, n_hidden)), 'w2') # print self.lr.type self.hidden = sigmoid(tensor.dot(self.w1, self.input)) self.output = tensor.dot(self.w2, self.hidden) self.cost = tensor.sum((self.output - self.target)**2) self.sgd_updates = { self.w1: self.w1 - self.lr * tensor.grad(self.cost, self.w1), self.w2: self.w2 - self.lr * tensor.grad(self.cost, self.w2)} self.sgd_step = pfunc( params=[self.input, self.target], outputs=[self.output, self.cost], updates=self.sgd_updates) self.compute_output = pfunc([self.input], self.output) self.output_from_hidden = pfunc([self.hidden], self.output)
def __init__(self, hyperparameters): self.hyperparameters = hyperparameters numpy.random.seed() self.embeddings = numpy.asarray((numpy.random.rand(self.hyperparameters.vocab_size, self.hyperparameters.embedding_size) - 0.5)* 2 * 0.01, dtype=floatX) self.hidden_weights = shared(numpy.asarray(random_weights(self.hyperparameters.input_size, self.hyperparameters.hidden_size, scale_by=1), dtype=floatX)) self.output_weights = shared(numpy.asarray(random_weights(self.hyperparameters.hidden_size, self.hyperparameters.output_size, scale_by=1), dtype=floatX)) self.hidden_biases = shared(numpy.asarray(numpy.zeros((self.hyperparameters.hidden_size,)), dtype=floatX)) self.output_biases = shared(numpy.asarray(numpy.zeros((self.hyperparameters.output_size,)), dtype=floatX))
def test_strict_generic(self): # this should work, because # generic can hold anything even when strict=True u = shared('asdf', strict=False) v = shared('asdf', strict=True) u.set_value(88) v.set_value(88)
def run_nnet(use_gpu, n_batch=60, n_in=1024, n_hid=2048, n_out=10, n_train=100): if config.mode == 'DEBUG_MODE': n_train = 1 if use_gpu: w = tcn.shared_constructor(0.01 * (my_rand(n_in, n_hid) - 0.5), 'w') b = tcn.shared_constructor(my_zeros(n_hid), 'b') v = tcn.shared_constructor(my_zeros((n_hid, n_out)), 'c') c = tcn.shared_constructor(my_zeros(n_out), 'c') else: w = shared(0.01 * (my_rand(n_in, n_hid) - 0.5), 'w') b = shared(my_zeros(n_hid), 'b') v = shared(my_zeros((n_hid, n_out)), 'c') c = shared(my_zeros(n_out), 'c') x = tensor.fmatrix('x') y = tensor.fmatrix('y') lr = tensor.fscalar('lr') hid = tensor.tanh(tensor.dot(x, w) + b) out = tensor.tanh(tensor.dot(hid, v) + c) loss = tensor.sum(0.5 * (out - y) ** 2 * lr) if 0: print('loss type', loss.type) params = [w, b, v, c] gparams = tensor.grad(loss, params) mode = get_mode(use_gpu) # print 'building pfunc ...' train = pfunc([x, y, lr], [loss], mode=mode, updates=[(p, p - g) for p, g in izip(params, gparams)]) if 0: for i, n in enumerate(train.maker.fgraph.toposort()): print(i, n) xval = my_rand(n_batch, n_in) yval = my_rand(n_batch, n_out) lr = theano._asarray(0.01, dtype='float32') t0 = time.time() rval = [] for i in xrange(n_train): rval.append(train(xval, yval, lr)) dt = time.time() - t0 print_mode(mode) return numpy.asarray(rval), dt
def test_scalar_floatX(self): # the test should assure that floatX is not used in the shared # constructor for scalars Shared values can change, and since we don't # know the range they might take, we should keep the same # bit width / precision as the original value used to create the # shared variable. # Since downcasting of a value now raises an Exception, def f(var, val): var.set_value(val) b = shared(numpy.int64(7), allow_downcast=True) assert b.type == theano.tensor.lscalar f(b, 8.23) assert b.get_value() == 8 b = shared(numpy.int32(7), allow_downcast=True) assert b.type == theano.tensor.iscalar f(b, 8.23) assert b.get_value() == 8 b = shared(numpy.int16(7), allow_downcast=True) assert b.type == theano.tensor.wscalar f(b, 8.23) assert b.get_value() == 8 b = shared(numpy.int8(7), allow_downcast=True) assert b.type == theano.tensor.bscalar f(b, 8.23) assert b.get_value() == 8 b = shared(numpy.float64(7.234), allow_downcast=True) assert b.type == theano.tensor.dscalar f(b, 8) assert b.get_value() == 8 b = shared(numpy.float32(7.234), allow_downcast=True) assert b.type == theano.tensor.fscalar f(b, 8) assert b.get_value() == 8 b = shared(numpy.float(7.234), allow_downcast=True) assert b.type == theano.tensor.dscalar f(b, 8) assert b.get_value() == 8 b = shared(7.234, allow_downcast=True) assert b.type == theano.tensor.dscalar f(b, 8) assert b.get_value() == 8 b = shared(numpy.zeros((5, 5), dtype='float32')) self.assertRaises(TypeError, f, b, numpy.random.rand(5, 5))
def train(self, train_set_x, pretraining_epochs=15, pretrain_lr=0.001, batch_size=1, n_ins=784, hidden_layers_sizes=[500, 500]): """ 对StackedAutoEncoder进行训练 """ if not isinstance(train_set_x, TensorSharedVariable): train_set_x = shared(train_set_x) n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_train_batches /= batch_size print "hidden_layers_sizes: ", hidden_layers_sizes print "... building the model" numpy_rng = numpy.random.RandomState(89677) self.sda = AdvancedStackedAutoEncoder( numpy_rng=numpy_rng, n_ins=n_ins, hidden_layers_sizes=hidden_layers_sizes, ) print "... getting the pretraining function" pretraining_fns = self.sda.pretraining_functions(train_set_x=train_set_x, batch_size=batch_size) print '... pre-training the model' for i in xrange(self.sda.n_layers): # go through pretraining epochs for epoch in xrange(pretraining_epochs): # go through the training set c = [] for batch_index in xrange(n_train_batches): c.append(pretraining_fns[i](index=batch_index,lr=pretrain_lr)) print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch), print numpy.mean(c)
def gen(self, op, *args, **kwargs): """Create a new random stream in this container. :param op: a RandomFunction instance to :param args: interpreted by `op` :param kwargs: interpreted by `op` :returns: The symbolic random draw part of op()'s return value. This function stores the updated RandomStateType Variable for use at `build` time. :rtype: TensorVariable """ seed = int(self.gen_seedgen.randint(2 ** 30)) random_state_variable = shared(numpy.random.RandomState(seed)) # Add a reference to distinguish from other shared variables random_state_variable.tag.is_rng = True new_r, out = op(random_state_variable, *args, **kwargs) out.rng = random_state_variable out.update = (random_state_variable, new_r) self.state_updates.append(out.update) random_state_variable.default_update = new_r return out
def mv_shared(*args, **kwargs): '''mv_shared works same as `theano.shared` It calls `theano.shared` to create the SharedVariable and use MVSharedVariable to wrap it. ''' var = shared(*args, **kwargs) mv_shared.shared_vars.append(MVSharedVariable(var)) return var
def test_tensor_floatX(self): def f(var, val): var.set_value(val) b = shared(numpy.int64([7]), allow_downcast=True) assert b.type == theano.tensor.lvector f(b, [8.23]) assert b.get_value() == 8 b = shared(numpy.int32([7]), allow_downcast=True) assert b.type == theano.tensor.ivector f(b, [8.23]) assert b.get_value() == 8 b = shared(numpy.int16([7]), allow_downcast=True) assert b.type == theano.tensor.wvector f(b, [8.23]) assert b.get_value() == 8 b = shared(numpy.int8([7]), allow_downcast=True) assert b.type == theano.tensor.bvector f(b, [8.23]) assert b.get_value() == 8 b = shared(numpy.float64([7.234]), allow_downcast=True) assert b.type == theano.tensor.dvector f(b, [8]) assert b.get_value() == 8 b = shared(numpy.float32([7.234]), allow_downcast=True) assert b.type == theano.tensor.fvector f(b, [8]) assert b.get_value() == 8 # numpy.float([7.234]) don't work # b = shared(numpy.float([7.234])) # assert b.type == theano.tensor.dvector # f(b,[8]) # This generate a generic type. Should we cast? I don't think. # b = shared([7.234]) # assert b.type == theano.tensor.dvector # f(b,[8]) b = shared(numpy.asarray([7.234], dtype=theano.config.floatX), allow_downcast=True) assert b.dtype == theano.config.floatX f(b, [8]) assert b.get_value() == 8 b = shared(numpy.zeros((5, 5), dtype='float32')) self.assertRaises(TypeError, f, b, numpy.random.rand(5, 5))
def test_ctors(self): if theano.configdefaults.python_int_bitwidth() == 32: assert shared(7).type == theano.tensor.iscalar, shared(7).type else: assert shared(7).type == theano.tensor.lscalar, shared(7).type assert shared(7.0).type == theano.tensor.dscalar assert shared(numpy.float32(7)).type == theano.tensor.fscalar # test tensor constructor b = shared(numpy.zeros((5, 5), dtype='int32')) assert b.type == TensorType('int32', broadcastable=[False, False]) b = shared(numpy.random.rand(4, 5)) assert b.type == TensorType('float64', broadcastable=[False, False]) b = shared(numpy.random.rand(5, 1, 2)) assert b.type == TensorType('float64', broadcastable=[False, False, False]) assert shared([]).type == generic def badfunc(): shared(7, bad_kw=False) self.assertRaises(TypeError, badfunc)
def __init__(self, window_size, vocab_size, embedding_size, hidden_size, seed, initial_embeddings, two_hidden_layers): """ Initialize L{Model} parameters. """ self.vocab_size = vocab_size self.window_size = window_size self.embedding_size = embedding_size self.two_hidden_layers = two_hidden_layers if LBL: self.hidden_size = hidden_size self.output_size = self.embedding_size else: self.hidden_size = hidden_size self.output_size = 1 import numpy import hyperparameters from pylearn.algorithms.weights import random_weights numpy.random.seed(seed) if initial_embeddings is None: self.embeddings = numpy.asarray((numpy.random.rand(self.vocab_size, HYPERPARAMETERS["EMBEDDING_SIZE"]) - 0.5)*2 * HYPERPARAMETERS["INITIAL_EMBEDDING_RANGE"], dtype=floatX) else: assert initial_embeddings.shape == (self.vocab_size, HYPERPARAMETERS["EMBEDDING_SIZE"]) self.embeddings = copy.copy(initial_embeddings) if HYPERPARAMETERS["NORMALIZE_EMBEDDINGS"]: self.normalize(range(self.vocab_size)) if LBL: self.output_weights = shared(numpy.asarray(random_weights(self.input_size, self.output_size, scale_by=HYPERPARAMETERS["SCALE_INITIAL_WEIGHTS_BY"]), dtype=floatX)) self.output_biases = shared(numpy.asarray(numpy.zeros((1, self.output_size)), dtype=floatX)) self.score_biases = shared(numpy.asarray(numpy.zeros(self.vocab_size), dtype=floatX)) assert not self.two_hidden_layers else: self.hidden_weights = shared(numpy.asarray(random_weights(self.input_size, self.hidden_size, scale_by=HYPERPARAMETERS["SCALE_INITIAL_WEIGHTS_BY"]), dtype=floatX)) self.hidden_biases = shared(numpy.asarray(numpy.zeros((self.hidden_size,)), dtype=floatX)) if self.two_hidden_layers: self.hidden2_weights = shared(numpy.asarray(random_weights(self.hidden_size, self.hidden_size, scale_by=HYPERPARAMETERS["SCALE_INITIAL_WEIGHTS_BY"]), dtype=floatX)) self.hidden2_biases = shared(numpy.asarray(numpy.zeros((self.hidden_size,)), dtype=floatX)) self.output_weights = shared(numpy.asarray(random_weights(self.hidden_size, self.output_size, scale_by=HYPERPARAMETERS["SCALE_INITIAL_WEIGHTS_BY"]), dtype=floatX)) self.output_biases = shared(numpy.asarray(numpy.zeros((self.output_size,)), dtype=floatX))
def test_scalar_strict(self): def f(var, val): var.set_value(val) b = shared(numpy.int64(7), strict=True) assert b.type == theano.tensor.lscalar self.assertRaises(TypeError, f, b, 8.23) b = shared(numpy.int32(7), strict=True) assert b.type == theano.tensor.iscalar self.assertRaises(TypeError, f, b, 8.23) b = shared(numpy.int16(7), strict=True) assert b.type == theano.tensor.wscalar self.assertRaises(TypeError, f, b, 8.23) b = shared(numpy.int8(7), strict=True) assert b.type == theano.tensor.bscalar self.assertRaises(TypeError, f, b, 8.23) b = shared(numpy.float64(7.234), strict=True) assert b.type == theano.tensor.dscalar self.assertRaises(TypeError, f, b, 8) b = shared(numpy.float32(7.234), strict=True) assert b.type == theano.tensor.fscalar self.assertRaises(TypeError, f, b, 8) b = shared(numpy.float(7.234), strict=True) assert b.type == theano.tensor.dscalar self.assertRaises(TypeError, f, b, 8) b = shared(7.234, strict=True) assert b.type == theano.tensor.dscalar self.assertRaises(TypeError, f, b, 8) b = shared(numpy.zeros((5, 5), dtype='float32')) self.assertRaises(TypeError, f, b, numpy.random.rand(5, 5))
def test_SdA(): """ test AdvancedStackedAutoEncoder """ # test Sda 就是这么搞 # 现在想要得到feature只需要执行两个接口即可 # train(), get_features() 非常easy train_sets = [ [1., 1., 1.], [2., 2., 2.], ] train_set_x = numpy.asarray(train_sets) train_set_x = shared(train_set_x) # test_set = [4.] * (28*28) test_set = [4.] * 3 driver = StackedAutoEncoderDriver() driver.train(train_set_x, n_ins=3, hidden_layers_sizes=[2, 1]) # driver.train_with_mnist() params = driver.sda.params features = driver.get_features(test_set) print features
def test_tensor_strict(self): def f(var, val): var.set_value(val) b = shared(numpy.int64([7]), strict=True) assert b.type == theano.tensor.lvector self.assertRaises(TypeError, f, b, 8.23) b = shared(numpy.int32([7]), strict=True) assert b.type == theano.tensor.ivector self.assertRaises(TypeError, f, b, 8.23) b = shared(numpy.int16([7]), strict=True) assert b.type == theano.tensor.wvector self.assertRaises(TypeError, f, b, 8.23) b = shared(numpy.int8([7]), strict=True) assert b.type == theano.tensor.bvector self.assertRaises(TypeError, f, b, 8.23) b = shared(numpy.float64([7.234]), strict=True) assert b.type == theano.tensor.dvector self.assertRaises(TypeError, f, b, 8) b = shared(numpy.float32([7.234]), strict=True) assert b.type == theano.tensor.fvector self.assertRaises(TypeError, f, b, 8) # numpy.float([7.234]) don't work # b = shared(numpy.float([7.234]), strict=True) # assert b.type == theano.tensor.dvector # self.assertRaises(TypeError, f, b, 8) # This generate a generic type. Should we cast? I don't think. # b = shared([7.234], strict=True) # assert b.type == theano.tensor.dvector # self.assertRaises(TypeError, f, b, 8) b = shared(numpy.zeros((5, 5), dtype='float32')) self.assertRaises(TypeError, f, b, numpy.random.rand(5, 5))
def _update_w(self, data_align, data_sup, labels, w, s, theta, bias): """ Parameters ---------- data_align : list of 2D arrays, element i has shape=[voxels_i, n_align] Each element in the list contains the fMRI data for alignment of one subject. There are n_align samples for each subject. data_sup : list of 2D arrays, element i has shape=[voxels_i, samples_i] Each element in the list contains the fMRI data of one subject for the classification task. labels : list of arrays of int, element i has shape=[samples_i] Each element in the list contains the labels for the data samples in data_sup. w : list of array, element i has shape=[voxels_i, features] The orthogonal transforms (mappings) :math:`W_i` for each subject. s : array, shape=[features, samples] The shared response. theta : array, shape=[classes, features] The MLR class plane parameters. bias : array, shape=[classes] The MLR class biases. Returns ------- w : list of 2D array, element i has shape=[voxels_i, features] The updated orthogonal transforms (mappings). """ subjects = len(data_align) s_th = S.shared(s.astype(theano.config.floatX)) theta_th = S.shared(theta.T.astype(theano.config.floatX)) bias_th = S.shared(bias.T.astype(theano.config.floatX), broadcastable=(True, False)) for subject in range(subjects): logger.info('Subject Wi %d' % subject) # Solve for subject i # Create the theano function w_th = T.matrix(name='W', dtype=theano.config.floatX) data_srm_subject = \ S.shared(data_align[subject].astype(theano.config.floatX)) constf1 = \ S.shared((1 - self.alpha) * 0.5 / data_align[subject].shape[1], allow_downcast=True) f1 = constf1 * T.sum((data_srm_subject - w_th.dot(s_th))**2) if data_sup[subject] is not None: lr_samples_S = S.shared(data_sup[subject].shape[1]) data_sup_subject = \ S.shared(data_sup[subject].astype(theano.config.floatX)) labels_S = S.shared(labels[subject]) constf2 = S.shared(-self.alpha / self.gamma / data_sup[subject].shape[1], allow_downcast=True) log_p_y_given_x = T.log(T.nnet.softmax((theta_th.dot( w_th.T.dot(data_sup_subject))).T + bias_th)) f2 = constf2 * T.sum( log_p_y_given_x[T.arange(lr_samples_S), labels_S]) f = f1 + f2 else: f = f1 # Define the problem and solve f_subject = self._objective_function_subject(data_align[subject], data_sup[subject], labels[subject], w[subject], s, theta, bias) minstep = np.min((10**-np.floor(np.log10(f_subject))), 1e-1) manifold = Stiefel(w[subject].shape[0], w[subject].shape[1]) problem = Problem(manifold=manifold, cost=f, arg=w_th, verbosity=0) solver = ConjugateGradient(mingradnorm=1e-2, minstepsize=minstep) w[subject] = np.array(solver.solve( problem, x=w[subject].astype(theano.config.floatX))) if data_sup[subject] is not None: del f2 del log_p_y_given_x del data_sup_subject del labels_S del solver del problem del manifold del f del f1 del data_srm_subject del w_th del theta_th del bias_th del s_th # Run garbage collector to avoid filling up the memory gc.collect() return w
import theano.tensor as T import theano.compile.sharedvalue as S import numpy as np from pymanopt import Problem from pymanopt.manifolds import Euclidean from pymanopt.solvers import TrustRegions if __name__ == "__main__": # Cost function is the squared reconstruction error wT = T.matrix() yT = S.shared(np.random.randn(1, 1)) XT = S.shared(np.random.randn(1, 1)) cost = T.sum((yT - wT.T.dot(XT))**2) # A solver that involves the hessian solver = TrustRegions() # R^3 manifold = Euclidean(3, 1) # Create the problem with extra cost function arguments problem = Problem(manifold=manifold, cost=cost, arg=wT, verbosity=0) # Solve 5 instances of the same type of problem for different data input for k in range(0, 5): # Generate random data X = np.random.randn(3, 200) Y = np.random.randn(1, 200) yT.set_value(Y) XT.set_value(X)
def badfunc(): shared(7, bad_kw=False)
def test_err_symbolic_variable(self): self.assertRaises(TypeError, shared, theano.tensor.ones((2, 3))) shared(numpy.ones((2, 4)))
def test_no_default_updates(self): x = shared(0) y = shared(1) x.default_update = x + 2 # Test that the default update is taken into account in the right cases f1 = pfunc([], [x], no_default_updates=True) f1() assert x.get_value() == 0 f2 = pfunc([], [x], no_default_updates=[x]) f2() assert x.get_value() == 0 f3 = pfunc([], [x], no_default_updates=[x, y]) f3() assert x.get_value() == 0 f4 = pfunc([], [x], no_default_updates=[y]) f4() assert x.get_value() == 2 f5 = pfunc([], [x], no_default_updates=[]) f5() assert x.get_value() == 4 f5 = pfunc([], [x], no_default_updates=False) f5() assert x.get_value() == 6 with pytest.raises(TypeError): pfunc([], [x], no_default_updates=(x)) with pytest.raises(TypeError): pfunc([], [x], no_default_updates=x) with pytest.raises(TypeError): pfunc([], [x], no_default_updates="canard") # Mix explicit updates and no_default_updates g1 = pfunc([], [x], updates=[(x, (x - 1))], no_default_updates=True) g1() assert x.get_value() == 5 g2 = pfunc([], [x], updates=[(x, (x - 1))], no_default_updates=[x]) g2() assert x.get_value() == 4 g3 = pfunc([], [x], updates=[(x, (x - 1))], no_default_updates=[x, y]) g3() assert x.get_value() == 3 g4 = pfunc([], [x], updates=[(x, (x - 1))], no_default_updates=[y]) g4() assert x.get_value() == 2 g5 = pfunc([], [x], updates=[(x, (x - 1))], no_default_updates=[]) g5() assert x.get_value() == 1 g5 = pfunc([], [x], updates=[(x, (x - 1))], no_default_updates=False) g5() assert x.get_value() == 0
def _update_classifier(self, data, labels, w, classes): """Update the classifier parameters theta and bias Parameters ---------- data : list of 2D arrays, element i has shape=[voxels_i, samples_i] Each element in the list contains the fMRI data of one subject for the classification task. labels : list of arrays of int, element i has shape=[samples_i] Each element in the list contains the labels for the data samples in data_sup. w : list of 2D array, element i has shape=[voxels_i, features] The orthogonal transforms (mappings) :math:`W_i` for each subject. classes : int The number of classes in the classifier. Returns ------- theta : array, shape=[features, classes] The MLR parameter for the class planes. bias : array shape=[classes,] The MLR parameter for class biases. """ # Stack the data and labels for training the classifier data_stacked, labels_stacked, weights = \ SSSRM._stack_list(data, labels, w) features = w[0].shape[1] total_samples = weights.size data_th = S.shared(data_stacked.astype(theano.config.floatX)) val_ = S.shared(labels_stacked) total_samples_S = S.shared(total_samples) theta_th = T.matrix(name='theta', dtype=theano.config.floatX) bias_th = T.col(name='bias', dtype=theano.config.floatX) constf2 = S.shared(self.alpha / self.gamma, allow_downcast=True) weights_th = S.shared(weights) log_p_y_given_x = \ T.log(T.nnet.softmax((theta_th.T.dot(data_th.T)).T + bias_th.T)) f = -constf2 * T.sum((log_p_y_given_x[T.arange(total_samples_S), val_]) / weights_th) + 0.5 * T.sum(theta_th ** 2) manifold = Product((Euclidean(features, classes), Euclidean(classes, 1))) problem = Problem(manifold=manifold, cost=f, arg=[theta_th, bias_th], verbosity=0) solver = ConjugateGradient(mingradnorm=1e-6) solution = solver.solve(problem) theta = solution[0] bias = solution[1] del constf2 del theta_th del bias_th del data_th del val_ del solver del solution return theta, bias
import theano.tensor as T import theano.compile.sharedvalue as S import numpy as np from pymanopt import Problem from pymanopt.manifolds import Euclidean from pymanopt.solvers import TrustRegions if __name__ == "__main__": # Cost function is the squared reconstruction error wT = T.matrix() yT = S.shared(np.random.randn(1, 1)) XT = S.shared(np.random.randn(1, 1)) cost = T.sum((yT-wT.T.dot(XT))**2) # A solver that involves the hessian solver = TrustRegions() # R^3 manifold = Euclidean(3, 1) # Create the problem with extra cost function arguments problem = Problem(manifold=manifold, cost=cost, arg=wT, verbosity=0) # Solve 5 instances of the same type of problem for different data input for k in range(0, 5): # Generate random data X = np.random.randn(3, 200) Y = np.random.randn(1, 200) yT.set_value(Y)
def badfunc(): shared(7, bad_kw=False)
def _update_w(self, data_align, data_sup, labels, w, s, theta, bias): """ Parameters ---------- data_align : list of 2D arrays, element i has shape=[voxels_i, n_align] Each element in the list contains the fMRI data for alignment of one subject. There are n_align samples for each subject. data_sup : list of 2D arrays, element i has shape=[voxels_i, samples_i] Each element in the list contains the fMRI data of one subject for the classification task. labels : list of arrays of int, element i has shape=[samples_i] Each element in the list contains the labels for the data samples in data_sup. w : list of array, element i has shape=[voxels_i, features] The orthogonal transforms (mappings) :math:`W_i` for each subject. s : array, shape=[features, samples] The shared response. theta : array, shape=[classes, features] The MLR class plane parameters. bias : array, shape=[classes] The MLR class biases. Returns ------- w : list of 2D array, element i has shape=[voxels_i, features] The updated orthogonal transforms (mappings). """ subjects = len(data_align) s_th = S.shared(s.astype(theano.config.floatX)) theta_th = S.shared(theta.T.astype(theano.config.floatX)) bias_th = S.shared(bias.T.astype(theano.config.floatX), broadcastable=(True, False)) for subject in range(subjects): logger.info('Subject Wi %d' % subject) # Solve for subject i # Create the theano function w_th = T.matrix(name='W', dtype=theano.config.floatX) data_srm_subject = \ S.shared(data_align[subject].astype(theano.config.floatX)) constf1 = \ S.shared((1 - self.alpha) * 0.5 / data_align[subject].shape[1], allow_downcast=True) f1 = constf1 * T.sum((data_srm_subject - w_th.dot(s_th))**2) if data_sup[subject] is not None: lr_samples_S = S.shared(data_sup[subject].shape[1]) data_sup_subject = \ S.shared(data_sup[subject].astype(theano.config.floatX)) labels_S = S.shared(labels[subject]) constf2 = S.shared(-self.alpha / self.gamma / data_sup[subject].shape[1], allow_downcast=True) log_p_y_given_x = T.log( T.nnet.softmax( (theta_th.dot(w_th.T.dot(data_sup_subject))).T + bias_th)) f2 = constf2 * T.sum(log_p_y_given_x[T.arange(lr_samples_S), labels_S]) f = f1 + f2 else: f = f1 # Define the problem and solve f_subject = self._objective_function_subject( data_align[subject], data_sup[subject], labels[subject], w[subject], s, theta, bias) minstep = np.amin(((10**-np.floor(np.log10(f_subject))), 1e-1)) manifold = Stiefel(w[subject].shape[0], w[subject].shape[1]) problem = Problem(manifold=manifold, cost=f, arg=w_th, verbosity=0) solver = ConjugateGradient(mingradnorm=1e-2, minstepsize=minstep) w[subject] = np.array( solver.solve(problem, x=w[subject].astype(theano.config.floatX))) if data_sup[subject] is not None: del f2 del log_p_y_given_x del data_sup_subject del labels_S del solver del problem del manifold del f del f1 del data_srm_subject del w_th del theta_th del bias_th del s_th # Run garbage collector to avoid filling up the memory gc.collect() return w
def test_duplicate_updates(self): x, y = dmatrices("x", "y") z = shared(np.ones((2, 3))) with pytest.raises(ValueError): theano.function([x, y], [z], updates=[(z, (z + x + y)), (z, (z - x))])
def _update_classifier(self, data, labels, w, classes): """Update the classifier parameters theta and bias Parameters ---------- data : list of 2D arrays, element i has shape=[voxels_i, samples_i] Each element in the list contains the fMRI data of one subject for the classification task. labels : list of arrays of int, element i has shape=[samples_i] Each element in the list contains the labels for the data samples in data_sup. w : list of 2D array, element i has shape=[voxels_i, features] The orthogonal transforms (mappings) :math:`W_i` for each subject. classes : int The number of classes in the classifier. Returns ------- theta : array, shape=[features, classes] The MLR parameter for the class planes. bias : array shape=[classes,] The MLR parameter for class biases. """ # Stack the data and labels for training the classifier data_stacked, labels_stacked, weights = \ SSSRM._stack_list(data, labels, w) features = w[0].shape[1] total_samples = weights.size data_th = S.shared(data_stacked.astype(theano.config.floatX)) val_ = S.shared(labels_stacked) total_samples_S = S.shared(total_samples) theta_th = T.matrix(name='theta', dtype=theano.config.floatX) bias_th = T.col(name='bias', dtype=theano.config.floatX) constf2 = S.shared(self.alpha / self.gamma, allow_downcast=True) weights_th = S.shared(weights) log_p_y_given_x = \ T.log(T.nnet.softmax((theta_th.T.dot(data_th.T)).T + bias_th.T)) f = -constf2 * T.sum( (log_p_y_given_x[T.arange(total_samples_S), val_]) / weights_th) + 0.5 * T.sum(theta_th**2) manifold = Product((Euclidean(features, classes), Euclidean(classes, 1))) problem = Problem(manifold=manifold, cost=f, arg=[theta_th, bias_th], verbosity=0) solver = ConjugateGradient(mingradnorm=1e-6) solution = solver.solve(problem) theta = solution[0] bias = solution[1] del constf2 del theta_th del bias_th del data_th del val_ del solver del solution return theta, bias
def rebuild_collect_shared( outputs , inputs = None , replace = None , updates = None , rebuild_strict = True , copy_inputs_over = True , no_default_updates = False ): """ Function that allows replacing subgraphs of a computational graph. It returns a set of dictionaries and lists which collect (partial?) different information about shared variables. This info is required by `pfunc`. :type outputs: list of Theano Variables ( or Theano expressions) :param outputs: list of Theano variables or expressions representing the outputs of the computational graph :type inputs: list of Theano Variables ( or Theano expressions) :param inputs: list of Theano variables or expressions representing the inputs of the computational graph (or None) :type replace: dict :param replace: dictionary describing which subgraphs should be replaced by what :type updates: dict :param updates: dictionary describing updates expressions for shared variables :type rebuild_strict: bool :param rebuild_strict: flag, if true the type of all inputs should be the same as the for the current node :type copy_inputs_over: bool :param copy_inputs_over: flag; if False it will clone inputs :type no_default_updates: either bool or list of Variables :param no_default_updates: if True, do not perform any automatic update on Variables. If False (default), perform them all. Else, perform automatic updates on all Variables that are neither in "updates" nor in "no_default_updates". """ if isinstance(outputs,tuple): outputs = list(outputs) ## This function implements similar functionality as graph.clone ## and it should be merged with that clone_d = {} update_d = {} update_expr = [] # list of shared inputs that are used as inputs of the graph shared_inputs = [] def clone_v_get_shared_updates(v, copy_inputs_over): ''' Clones a variable and its inputs recursively until all are in clone_d. Also appends all shared variables met along the way to shared inputs, and their default_update (if applicable) to update_d and update_expr. v can have an env attached to it, case in which we want to clone constants ( to avoid having a constant belonging to two envs) ''' # this co-recurses with clone_a assert v is not None if v in clone_d: return clone_d[v] if v.owner: clone_a(v.owner, copy_inputs_over) return clone_d.setdefault(v,v) elif isinstance(v, SharedVariable): if v not in shared_inputs: shared_inputs.append(v) if hasattr(v, 'default_update'): # Check that v should not be excluded from the default # updates list if ( no_default_updates is False or ( isinstance(no_default_updates, list) and v not in no_default_updates ) ): # Do not use default_update if a "real" update was # provided if v not in update_d: v_update = v.filter_update(v.default_update) if v_update.type != v.type: raise TypeError( ( 'an update must have the same type as ' 'the original shared variable' ) , (v, v.type, v_update, v_update.type)) update_d[v] = v_update update_expr.append((v, v_update)) if not copy_inputs_over or (isinstance(v, Constant) and hasattr(v,'env')): ### Cloning shared variables implies copying their underlying ### memory buffer ?? No. return clone_d.setdefault(v,v.clone()) else: return clone_d.setdefault(v,v) def clone_a(a, copy_inputs_over): ''' Clones a variable and its inputs recursively until all are in clone_d. It occures with clone_v_get_shared_updates ''' if a is None: return None if a not in clone_d: for i in a.inputs: clone_v_get_shared_updates(i, copy_inputs_over) clone_d[a] = a.clone_with_new_inputs([clone_d[i] for i in a.inputs], strict = rebuild_strict) for old_o, new_o in zip(a.outputs, clone_d[a].outputs): clone_d.setdefault(old_o,new_o) return clone_d[a] # intialize the clone_d mapping with the replace dictionary if replace is None: replace = [] try: replace_pairs = replace.items() except Exception: replace_pairs = replace for v_orig, v_repl in replace_pairs: if not isinstance(v_orig,Variable): raise TypeError('given keys must be Variable', v_orig) if not isinstance(v_repl,Variable): v_repl = shared(v_repl) assert v_orig not in clone_d clone_d[v_orig] = clone_v_get_shared_updates(v_repl, copy_inputs_over) if inputs is None: inputs = [] def clone_inputs(i): if not copy_inputs_over: return clone_d.setdefault(i,i.clone()) else: return clone_d.setdefault(i,i) input_variables = [clone_inputs(i) for i in inputs] # It was decided, as a first step, to prevent shared variables from # being used as function inputs. Although it is technically possible, # it is also not clear when/how to use the value of that shared # variable (is it a default? ignored?, if the shared variable changes, # does that function default also change?). if numpy.any([isinstance(v, SharedVariable) for v in input_variables]): raise TypeError(('Cannot use a shared variable (%s) as explicit ' 'input. Consider substituting a non-shared' ' variable via the `givens` parameter') % v) # Fill update_d and update_expr with provided updates if updates is None: updates = [] for (store_into, update_val) in iter_over_pairs(updates): if not isinstance(store_into, SharedVariable): raise TypeError('update target must be a SharedVariable' , store_into) if store_into in update_d: raise ValueError(('this shared variable already has an update ' 'expression'), (store_into, update_d[store_into])) update_val = store_into.filter_update(update_val) # typically this might be a cast() if update_val.type != store_into.type: err_msg = ( 'an update must have the same type as the ' 'original shared variable(dest, dest.type, ' 'update_val, update_val.type)') err_arg = ( store_into , store_into.type , update_val , update_val.type) raise TypeError(err_msg, err_arg ) update_d[store_into] = update_val update_expr.append((store_into, update_val)) # Elements of "outputs" are here cloned to "cloned_outputs" if isinstance(outputs, list): cloned_outputs = [] for v in outputs: if isinstance(v, Variable): cloned_v = clone_v_get_shared_updates(v, copy_inputs_over) cloned_outputs.append(cloned_v) elif isinstance(v, Out): cloned_v = clone_v_get_shared_updates(v.variable, copy_inputs_over) cloned_outputs.append(Out(cloned_v, borrow=v.borrow)) else: raise TypeError( ( 'outputs must be theano Variable or ' 'Out instances'), v) #computed_list.append(cloned_v) else: if isinstance(outputs, Variable): cloned_v = clone_v_get_shared_updates(outputs, copy_inputs_over) cloned_outputs = cloned_v #computed_list.append(cloned_v) elif isinstance(outputs, Out): cloned_v = clone_v_get_shared_updates(outputs.variable, copy_inputs_over) cloned_outputs = Out(cloned_v, borrow=outputs.borrow) #computed_list.append(cloned_v) elif outputs is None: cloned_outputs = [] # TODO: get Function.__call__ to return None else: raise TypeError( ('output must be a theano Variable or Out ' 'instance (or list of them)') , outputs) # Iterate over update_expr, cloning its elements, and updating # shared_inputs, update_d and update_expr from the SharedVariables # we discover. # If the variable to be updated is a shared variable not already # in shared_inputs, add it. # Note: we extend update_expr while iterating over it. i = 0 while i<len(update_expr): v, v_update = update_expr[i] cloned_v_update = clone_v_get_shared_updates(v_update, copy_inputs_over) update_d[v] = cloned_v_update if isinstance(v, SharedVariable) and v not in shared_inputs: shared_inputs.append(v) i += 1 return ( input_variables, cloned_outputs , [clone_d, update_d, update_expr, shared_inputs] )
def rebuild_collect_shared(outputs, inputs=None, replace=None, updates=None, rebuild_strict=True, copy_inputs_over=True, no_default_updates=False, ): """ Function that allows replacing subgraphs of a computational graph. It returns a set of dictionaries and lists which collect (partial?) different information about shared variables. This info is required by `pfunc`. :type outputs: list of Theano Variables ( or Theano expressions) :param outputs: list of Theano variables or expressions representing the outputs of the computational graph :type inputs: list of Theano Variables ( or Theano expressions) :param inputs: list of Theano variables or expressions representing the inputs of the computational graph (or None) :type replace: dict :param replace: dictionary describing which subgraphs should be replaced by what. orig_value => new_value :type updates: dict :param updates: dictionary describing updates expressions for shared variables :type rebuild_strict: bool :param rebuild_strict: flag, if true the type of all inputs should be the same as the for the current node :type copy_inputs_over: bool :param copy_inputs_over: flag; if False it will clone inputs :type no_default_updates: either bool or list of Variables :param no_default_updates: if True, do not perform any automatic update on Variables. If False (default), perform them all. Else, perform automatic updates on all Variables that are neither in "updates" nor in "no_default_updates". """ if isinstance(outputs, tuple): outputs = list(outputs) # This function implements similar functionality as graph.clone # and it should be merged with that clone_d = {} update_d = {} update_expr = [] # list of shared inputs that are used as inputs of the graph shared_inputs = [] def clone_v_get_shared_updates(v, copy_inputs_over): ''' Clones a variable and its inputs recursively until all are in clone_d. Also appends all shared variables met along the way to shared inputs, and their default_update (if applicable) to update_d and update_expr. v can have an fgraph attached to it, case in which we want to clone constants ( to avoid having a constant belonging to two fgraphs) ''' # this co-recurses with clone_a assert v is not None if v in clone_d: return clone_d[v] if v.owner: clone_a(v.owner, copy_inputs_over) return clone_d.setdefault(v, v) elif isinstance(v, SharedVariable): if v not in shared_inputs: shared_inputs.append(v) if hasattr(v, 'default_update'): # Check that v should not be excluded from the default # updates list if (no_default_updates is False or (isinstance(no_default_updates, list) and v not in no_default_updates)): # Do not use default_update if a "real" update was # provided if v not in update_d: v_update = v.type.filter_variable(v.default_update, allow_convert=False) if v_update.type != v.type: raise TypeError( 'an update must have the same type as ' 'the original shared variable', (v, v.type, v_update, v_update.type)) update_d[v] = v_update update_expr.append((v, v_update)) if not copy_inputs_over or (isinstance(v, Constant) and hasattr(v, 'fgraph')): # Cloning shared variables implies copying their underlying # memory buffer ?? No. return clone_d.setdefault(v, v.clone()) else: return clone_d.setdefault(v, v) def clone_a(a, copy_inputs_over): ''' Clones a variable and its inputs recursively until all are in clone_d. It occures with clone_v_get_shared_updates ''' if a is None: return None if a not in clone_d: for i in a.inputs: clone_v_get_shared_updates(i, copy_inputs_over) clone_d[a] = a.clone_with_new_inputs([clone_d[i] for i in a.inputs], strict=rebuild_strict) for old_o, new_o in zip(a.outputs, clone_d[a].outputs): clone_d.setdefault(old_o, new_o) return clone_d[a] # intialize the clone_d mapping with the replace dictionary if replace is None: replace = [] try: replace_pairs = list(replace.items()) except Exception: replace_pairs = replace for v_orig, v_repl in replace_pairs: if not isinstance(v_orig, Variable): raise TypeError('given keys must be Variable', v_orig) if not isinstance(v_repl, Variable): v_repl = shared(v_repl) if v_orig in clone_d: raise AssertionError( "When using 'givens' or 'replace' with several " "(old_v, new_v) replacement pairs, you can not have a " "new_v variable depend on an old_v one. For instance, " "givens = {a:b, b:(a+1)} is not allowed. Here, the old_v " "%s is used to compute other new_v's, but it is scheduled " "to be replaced by %s." % (v_orig, v_repl)) clone_d[v_orig] = clone_v_get_shared_updates(v_repl, copy_inputs_over) if inputs is None: inputs = [] def clone_inputs(i): if not copy_inputs_over: return clone_d.setdefault(i, i.clone()) else: return clone_d.setdefault(i, i) input_variables = [clone_inputs(i) for i in inputs] # It was decided, as a first step, to prevent shared variables from # being used as function inputs. Although it is technically possible, # it is also not clear when/how to use the value of that shared # variable (is it a default? ignored?, if the shared variable changes, # does that function default also change?). for v in input_variables: if isinstance(v, SharedVariable): raise TypeError(('Cannot use a shared variable (%s) as explicit ' 'input. Consider substituting a non-shared' ' variable via the `givens` parameter') % v) # Fill update_d and update_expr with provided updates if updates is None: updates = [] for (store_into, update_val) in iter_over_pairs(updates): if not isinstance(store_into, SharedVariable): raise TypeError('update target must be a SharedVariable', store_into) if store_into in update_d: raise ValueError('this shared variable already has an update ' 'expression', (store_into, update_d[store_into])) # filter_variable ensure smooth conversion of cpu/gpu Types try: update_val = store_into.type.filter_variable(update_val, allow_convert=False) except TypeError: err_msg = ('An update must have the same type as the' ' original shared variable (shared_var=%s,' ' shared_var.type=%s,' ' update_val=%s, update_val.type=%s).' % ( store_into, store_into.type, update_val, update_val.type)) err_sug = ('If the difference is related to the broadcast pattern,' ' you can call the' ' tensor.unbroadcast(var, axis_to_unbroadcast[, ...])' ' function to remove broadcastable dimensions.') raise TypeError(err_msg, err_sug) assert update_val.type == store_into.type update_d[store_into] = update_val update_expr.append((store_into, update_val)) # Elements of "outputs" are here cloned to "cloned_outputs" if isinstance(outputs, list): cloned_outputs = [] for v in outputs: if isinstance(v, Variable): cloned_v = clone_v_get_shared_updates(v, copy_inputs_over) cloned_outputs.append(cloned_v) elif isinstance(v, Out): cloned_v = clone_v_get_shared_updates(v.variable, copy_inputs_over) cloned_outputs.append(Out(cloned_v, borrow=v.borrow)) else: raise TypeError('Outputs must be theano Variable or ' 'Out instances. Received ' + str(v) + ' of type ' + str(type(v))) # computed_list.append(cloned_v) else: if isinstance(outputs, Variable): cloned_v = clone_v_get_shared_updates(outputs, copy_inputs_over) cloned_outputs = cloned_v # computed_list.append(cloned_v) elif isinstance(outputs, Out): cloned_v = clone_v_get_shared_updates(outputs.variable, copy_inputs_over) cloned_outputs = Out(cloned_v, borrow=outputs.borrow) # computed_list.append(cloned_v) elif outputs is None: cloned_outputs = [] # TODO: get Function.__call__ to return None else: raise TypeError('output must be a theano Variable or Out ' 'instance (or list of them)', outputs) # Iterate over update_expr, cloning its elements, and updating # shared_inputs, update_d and update_expr from the SharedVariables # we discover. # If the variable to be updated is a shared variable not already # in shared_inputs, add it. # Note: we extend update_expr while iterating over it. i = 0 while i < len(update_expr): v, v_update = update_expr[i] cloned_v_update = clone_v_get_shared_updates(v_update, copy_inputs_over) update_d[v] = cloned_v_update if isinstance(v, SharedVariable) and v not in shared_inputs: shared_inputs.append(v) i += 1 return (input_variables, cloned_outputs, [clone_d, update_d, update_expr, shared_inputs])
from keras.optimizers import SGD from keras.utils import np_utils img_rows = 512 img_cols = 512 nb_epoch = 1000 iteration_size = 100000 mini_batch_size = 12 delta = 16 initial_discount = 0.01 discount_step = 0.1 num_samples_per_epoch = 50000 num_validation_samples = 5000 d = shared(initial_discount, name = 'd') def fcrn_loss(y_true, y_pred): loss = K.square(y_pred - y_true) images = [] for i in range(0, mini_batch_size): c = y_true[i, 6, :,:].reshape((1, delta, delta)) # The last feature map in the true vals is the 'c' matrix final_c = (c * loss[i,6,:,:]) c = T.set_subtensor(c[(c<=0.0).nonzero()], d.get_value()) # Element-wise multiply of the c feature map against all feature maps in the loss final_loss_parts = [(c * loss[i, j, :, :].reshape((1, delta, delta))).reshape((1, delta, delta)) for j in range(0, 6)]
def test_err_symbolic_variable(self): with pytest.raises(TypeError): shared(theano.tensor.ones((2, 3))) shared(np.ones((2, 4)))
def pfunc(params, outputs=None, mode=None, updates=None, givens=None, no_default_updates=False, accept_inplace=False, name=None, rebuild_strict=True, allow_input_downcast=None, profile=None, on_unused_input=None): """Function-constructor for graphs with shared variables. :type params: list of either Variable or Param instances. :param params: function parameters, these are not allowed to be shared variables :type outputs: list of Variables or Out instances :param outputs: expressions to compute :type mode: string or `theano.compile.Mode` instance. :param mode: compilation mode :type updates: iterable over pairs (shared_variable, new_expression). List, tuple or dict. :param updates: update the values for SharedVariable inputs according to these expressions :type givens: iterable over pairs (Var1, Var2) of Variables. List, tuple or dict. The Var1 and Var2 in each pair must have the same Type. :param givens: specific substitutions to make in the computation graph (Var2 replaces Var1). :type no_default_updates: either bool or list of Variables :param no_default_updates: if True, do not perform any automatic update on Variables. If False (default), perform them all. Else, perform automatic updates on all Variables that are neither in "updates" nor in "no_default_updates". :type name: None or string :param name: attaches a name to the profiling result of this function. :type allow_input_downcast: Boolean :param allow_input_downcast: True means that the values passed as inputs when calling the function can be silently downcasted to fit the dtype of the corresponding Variable, which may lose precision. False means that it will only be cast to a more general, or precise, type. None (default) is almost like False, but allows downcasting of Python float scalars to floatX. :type profile: None, True, str, or ProfileStats instance :param profile: accumulate profiling information into a given ProfileStats instance. None is the default, and means to use the value of config.profile. If argument is `True` then a new ProfileStats instance will be used. If argument is a string, a new ProfileStats instance will be created with that string as its `message` attribute. This profiling object will be available via self.profile. :type on_unused_input: str :param on_unused_input: What to do if a variable in the 'inputs' list is not used in the graph. Possible values are 'raise', 'warn', 'ignore' and None. :rtype: theano.compile.Function :returns: a callable object that will compute the outputs (given the inputs) and update the implicit function arguments according to the `updates`. :note: Regarding givens: Be careful to make sure that these substitutions are independent--behaviour when Var1 of one pair appears in the graph leading to Var2 in another expression is undefined. Replacements specified with givens are different from optimizations in that Var2 is not expected to be equivalent to Var1. """ # # This function works by cloning the graph (except for the inputs), and then shipping it # off to compile.function # (There it will be cloned again, unnecessarily, because it doesn't know that we already # cloned it.) # # First, it clones the replacements named in the givens argument, and points each Var1 to # the clone of Var2. # Then it sets the inputs in the clone dictionary. # After these steps, we are assuming that the clone dictionary contains all the inputs to # the computation graph. # # Then it clones the outputs and the update expressions. This rebuilds a computation graph # from the inputs and the givens. # if updates is None: updates = [] if givens is None: givens = [] if profile is None: profile = config.profile # profile -> True or False if profile == True: profile = ProfileStats(message=name) # profile -> object if type(profile) == str: profile = ProfileStats(message=profile) # profile is typically either False or an object at this point. # No need to block other objects being passed through though. It might be # useful. if not isinstance(params, (list, tuple)): raise Exception("in pfunc() the first argument must be a list or a tuple") if not isinstance(no_default_updates, bool)\ and not isinstance(no_default_updates, list): raise TypeError("no_default_update should be either a boolean or a list") if len(updates) > 0 and any(isinstance(v, Variable) for v in iter_over_pairs(updates)): raise ValueError( "The updates parameter must be an OrderedDict/dict or a list of " "lists/tuples with 2 elements") # transform params into theano.compile.In objects. inputs = [_pfunc_param_to_in(p, allow_downcast=allow_input_downcast) for p in params] # Check if some variable is present more than once in inputs in_variables = [input.variable for input in inputs] for i, v in enumerate(in_variables): if v in in_variables[(i + 1):]: dup_v_i = in_variables.index(v, (i + 1)) raise UnusedInputError( ("Variable %s is used twice in inputs to theano.function, " "at indices %i and %i. This would result in values " "provided for it being ignored. Please do not duplicate " "variables in the inputs list." % (v, i, dup_v_i))) # Check that we are not using `givens` to replace input variables, because # this typically does nothing, contrary to what one may expect. in_var_set = set(in_variables) try: givens_pairs = givens.items() except AttributeError: givens_pairs = givens for x, y in givens_pairs: if x in in_var_set: raise RuntimeError( 'You are trying to replace variable \'%s\' through the ' '`givens` parameter, but this variable is an input to your ' 'function. Replacing inputs is currently forbidden because it ' 'has no effect. One way to modify an input `x` to a function ' 'evaluating f(x) is to define a new input `y` and use ' '`theano.function([y], f(x), givens={x: g(y)})`. Another ' 'solution consists in using `theano.clone`, e.g. like this: ' '`theano.function([x], ' 'theano.clone(f(x), replace={x: g(x)}))`.' % x) mode = theano.compile.mode.get_mode(mode) # Extract TensorSharedVariables if not isinstance(givens, dict): givens = dict(givens) if outputs is None: o = [] elif isinstance(outputs, (theano.gof.Variable, Out)): o = [outputs] else: o = list(outputs) for oo in ( #Check in the updates keys or values have inputs [x for x, y in iter_over_pairs(updates)] + [y for x, y in iter_over_pairs(updates)]): if isinstance(oo, Out): oo = oo.variable elif not isinstance(oo, Variable): oo = shared(oo) o.append(oo) inp = theano.gof.graph.inputs(o, blockers=[i.variable for i in inputs]) shared_inputs = [v for v in inp if isinstance(v, theano.tensor.sharedvar.TensorSharedVariable) and (not v.force_type) and isinstance(v.type, theano.tensor.TensorType)] # Do we want to make the inner type of TensorSharedVariable # resize on GPU. gpu = (isinstance(mode.provided_optimizer, theano.gof.Query) and "gpu" in mode.provided_optimizer.include) #TODO check for collision with givens for sv in shared_inputs: # clone is a FunctionTensorSharedVariable pointing to # original. It is transfered to GPU or CPU if gpu is True or # false, respectively. clone = sv.functionClone(gpu) # clone should have same type as original if sv._isCudaType(sv.type): clone = clone._as_CudaNdarrayVariable() else: clone = clone._as_TensorVariable() if sv in givens: import pdb;pdb.set_trace() else: givens[sv] = clone if sv in updates: repl = updates[sv] del updates[sv] updates[clone] = repl output_vars = rebuild_collect_shared(outputs, in_variables, replace=givens, updates=updates, rebuild_strict=rebuild_strict, copy_inputs_over=True, no_default_updates=no_default_updates) # extracting the arguments input_variables, cloned_outputs, other_stuff = output_vars clone_d, update_d, update_expr, shared_inputs = other_stuff for i, iv in zip(inputs, input_variables): i.variable = iv for sv in shared_inputs: #pass value of None here #value will be stored in the resulting functions' defaults list #but since the value of shared variables never needs to be refed, it is not needed if sv in update_d: si = In(variable=sv, value=sv.container, mutable=True, borrow=True, update=update_d[sv], shared=True) else: si = In(variable=sv, value=sv.container, mutable=False, borrow=True, shared=True) inputs.append(si) return orig_function(inputs, cloned_outputs, mode, accept_inplace=accept_inplace, name=name, profile=profile, on_unused_input=on_unused_input)
def test_err_symbolic_variable(self): self.assertRaises(TypeError, shared, theano.tensor.ones((2, 3))) shared(np.ones((2, 4)))
def rebuild_collect_shared(outputs, inputs=None, replace=None, updates=None, rebuild_strict=True, copy_inputs_over=True, no_default_updates=False, ): """ Function that allows replacing subgraphs of a computational graph. It returns a set of dictionaries and lists which collect (partial?) different information about shared variables. This info is required by `pfunc`. Parameters ---------- outputs : list of Theano Variables (or Theano expressions) List of Theano variables or expressions representing the outputs of the computational graph. inputs : list of Theano Variables (or Theano expressions) List of Theano variables or expressions representing the inputs of the computational graph (or None). replace : dict Dictionary describing which subgraphs should be replaced by what. orig_value => new_value updates : dict Dictionary describing updates expressions for shared variables. rebuild_strict : bool Flag, if true the type of all inputs should be the same as the one for the current node. copy_inputs_over : bool Flag; if False it will clone inputs. no_default_updates : either bool or list of Variables If True, do not perform any automatic update on Variables. If False (default), perform them all. Else, perform automatic updates on all Variables that are neither in "updates" nor in "no_default_updates". """ if isinstance(outputs, tuple): outputs = list(outputs) # This function implements similar functionality as graph.clone # and it should be merged with that clone_d = {} update_d = {} update_expr = [] # list of shared inputs that are used as inputs of the graph shared_inputs = [] def clone_v_get_shared_updates(v, copy_inputs_over): """ Clones a variable and its inputs recursively until all are in clone_d. Also appends all shared variables met along the way to shared inputs, and their default_update (if applicable) to update_d and update_expr. v can have an fgraph attached to it, case in which we want to clone constants (to avoid having a constant belonging to two fgraphs). """ # this co-recurses with clone_a assert v is not None if v in clone_d: return clone_d[v] if v.owner: clone_a(v.owner, copy_inputs_over) return clone_d.setdefault(v, v) elif isinstance(v, SharedVariable): if v not in shared_inputs: shared_inputs.append(v) if hasattr(v, 'default_update'): # Check that v should not be excluded from the default # updates list if (no_default_updates is False or (isinstance(no_default_updates, list) and v not in no_default_updates)): # Do not use default_update if a "real" update was # provided if v not in update_d: v_update = v.type.filter_variable(v.default_update, allow_convert=False) if v_update.type != v.type: raise TypeError( 'an update must have the same type as ' 'the original shared variable', (v, v.type, v_update, v_update.type)) update_d[v] = v_update update_expr.append((v, v_update)) if not copy_inputs_over or (isinstance(v, Constant) and hasattr(v, 'fgraph')): # Cloning shared variables implies copying their underlying # memory buffer ?? No. return clone_d.setdefault(v, v.clone()) else: return clone_d.setdefault(v, v) def clone_a(a, copy_inputs_over): """ Clones a variable and its inputs recursively until all are in clone_d. It occures with clone_v_get_shared_updates. """ if a is None: return None if a not in clone_d: for i in a.inputs: clone_v_get_shared_updates(i, copy_inputs_over) clone_d[a] = a.clone_with_new_inputs([clone_d[i] for i in a.inputs], strict=rebuild_strict) for old_o, new_o in zip(a.outputs, clone_d[a].outputs): clone_d.setdefault(old_o, new_o) return clone_d[a] # intialize the clone_d mapping with the replace dictionary if replace is None: replace = [] try: replace_pairs = list(replace.items()) except Exception: replace_pairs = replace for v_orig, v_repl in replace_pairs: if not isinstance(v_orig, Variable): raise TypeError('given keys must be Variable', v_orig) if not isinstance(v_repl, Variable): v_repl = shared(v_repl) if v_orig in clone_d: raise AssertionError( "When using 'givens' or 'replace' with several " "(old_v, new_v) replacement pairs, you can not have a " "new_v variable depend on an old_v one. For instance, " "givens = {a:b, b:(a+1)} is not allowed. Here, the old_v " "%s is used to compute other new_v's, but it is scheduled " "to be replaced by %s." % (v_orig, v_repl)) clone_d[v_orig] = clone_v_get_shared_updates(v_repl, copy_inputs_over) if inputs is None: inputs = [] def clone_inputs(i): if not copy_inputs_over: return clone_d.setdefault(i, i.clone()) else: return clone_d.setdefault(i, i) input_variables = [clone_inputs(i) for i in inputs] # It was decided, as a first step, to prevent shared variables from # being used as function inputs. Although it is technically possible, # it is also not clear when/how to use the value of that shared # variable (is it a default? ignored?, if the shared variable changes, # does that function default also change?). for v in input_variables: if isinstance(v, SharedVariable): raise TypeError(('Cannot use a shared variable (%s) as explicit ' 'input. Consider substituting a non-shared' ' variable via the `givens` parameter') % v) # Fill update_d and update_expr with provided updates if updates is None: updates = [] for (store_into, update_val) in iter_over_pairs(updates): if not isinstance(store_into, SharedVariable): raise TypeError('update target must be a SharedVariable', store_into) if store_into in update_d: raise ValueError('this shared variable already has an update ' 'expression', (store_into, update_d[store_into])) # filter_variable ensure smooth conversion of cpu/gpu Types try: update_val = store_into.type.filter_variable(update_val, allow_convert=False) except TypeError: err_msg = ('An update must have the same type as the' ' original shared variable (shared_var=%s,' ' shared_var.type=%s,' ' update_val=%s, update_val.type=%s).' % ( store_into, store_into.type, update_val, update_val.type)) err_sug = ('If the difference is related to the broadcast pattern,' ' you can call the' ' tensor.unbroadcast(var, axis_to_unbroadcast[, ...])' ' function to remove broadcastable dimensions.') raise TypeError(err_msg, err_sug) assert update_val.type == store_into.type update_d[store_into] = update_val update_expr.append((store_into, update_val)) # Elements of "outputs" are here cloned to "cloned_outputs" if isinstance(outputs, list): cloned_outputs = [] for v in outputs: if isinstance(v, Variable): cloned_v = clone_v_get_shared_updates(v, copy_inputs_over) cloned_outputs.append(cloned_v) elif isinstance(v, Out): cloned_v = clone_v_get_shared_updates(v.variable, copy_inputs_over) cloned_outputs.append(Out(cloned_v, borrow=v.borrow)) else: raise TypeError('Outputs must be theano Variable or ' 'Out instances. Received ' + str(v) + ' of type ' + str(type(v))) # computed_list.append(cloned_v) else: if isinstance(outputs, Variable): cloned_v = clone_v_get_shared_updates(outputs, copy_inputs_over) cloned_outputs = cloned_v # computed_list.append(cloned_v) elif isinstance(outputs, Out): cloned_v = clone_v_get_shared_updates(outputs.variable, copy_inputs_over) cloned_outputs = Out(cloned_v, borrow=outputs.borrow) # computed_list.append(cloned_v) elif outputs is None: cloned_outputs = [] # TODO: get Function.__call__ to return None else: raise TypeError('output must be a theano Variable or Out ' 'instance (or list of them)', outputs) # Iterate over update_expr, cloning its elements, and updating # shared_inputs, update_d and update_expr from the SharedVariables # we discover. # If the variable to be updated is a shared variable not already # in shared_inputs, add it. # Note: we extend update_expr while iterating over it. i = 0 while i < len(update_expr): v, v_update = update_expr[i] cloned_v_update = clone_v_get_shared_updates(v_update, copy_inputs_over) update_d[v] = cloned_v_update if isinstance(v, SharedVariable) and v not in shared_inputs: shared_inputs.append(v) i += 1 return (input_variables, cloned_outputs, [clone_d, update_d, update_expr, shared_inputs])
from keras.optimizers import SGD from keras.utils import np_utils img_rows = 512 img_cols = 512 nb_epoch = 1000 iteration_size = 100000 mini_batch_size = 12 #输入时,一次输入一批图像的大小 delta = 16 initial_discount = 0.01 discount_step = 0.1 num_samples_per_epoch = 50000 #训练集的数目 num_validation_samples = 5000 #验证集的数目 d = shared(initial_discount, name = 'd')#共享变量,就是各线程,公共拥有的变量,通过get_value()、set_value()可以查看、设置共享变量的数值。 http://blog.csdn.net/hjimce/article/details/46806923 def fcrn_loss(y_true, y_pred): loss = K.square(y_pred - y_true) #keras backend square(逐元素平方) 7*16*16 images = [] #如果某一个栅格 predictor 中没有 ground-truth 文本,这个 loss会忽略掉除了 c(text/non-text) 以外的所有 params。 for i in range(0, mini_batch_size): #计算一批图像的均值 c_true = y_true[i, 6, :,:].reshape((1, delta, delta)) #confidence, 1*16*16 # The last feature map in the true vals is the 'c' matrix #T.set_subtensor():将d.get_value()==initial_discount的值赋给c_true中<=0.0 c_discounted = T.set_subtensor(c_true[(c_true<=0.0).nonzero()], d.get_value()) #(c_true<=0.0).nonzero():取c_true<=0.0的下标,c_true[(c_true<=0.0).nonzero()]:取c_true<=0.0的值 final_c = (c_discounted * loss[i,6,:,:])