def setUp(self): super(Test_local_elemwise_alloc, self).setUp() self.fast_run_mode = mode_with_gpu # self.vec = tensor.vector('vec', dtype=dtype) # self.mat = tensor.matrix('mat', dtype=dtype) # self.tens = tensor.tensor3('tens', dtype=dtype) # self.alloc_wo_dep = basic_ops.gpu_alloc(self.vec, 2, 2) # self.alloc_w_dep = basic_ops.gpu_alloc(self.vec, *self.mat.shape) self.alloc_wo_dep = basic_ops.gpu_alloc(self.vec, 2, 2) self.alloc_w_dep = basic_ops.gpu_alloc(self.vec, *self.mat.shape) self.alloc_w_dep_tens = basic_ops.gpu_alloc( self.vec, self.tens.shape[0], self.tens.shape[1] ) self.tv_wo_dep = basic_ops.gpu_alloc(self.vec, 5, 5) self.tm_wo_dep = basic_ops.gpu_alloc(self.mat, 5, 5, 5) self.s = tensor.iscalar('s') self.tv_w_dep = basic_ops.gpu_alloc(self.vec, self.s, self.s) self.tm_w_dep = basic_ops.gpu_alloc(self.mat, 5, 5, 5) self.row = tensor.row(dtype=self.dtype) self.o = basic_ops.gpu_alloc(self.row, 5, 5)
def __init__(self, data, targets, log=False): # training, test = data[sp:], data[:sp] n_inputs = data.shape[1] n_targets = int(max(targets)+1) # print(n_inputs, int(n_targets)) train_scale = abs(data).max(0) train_shift = data.mean(0) / train_scale normalized = (data/train_scale - train_shift)[:, None] train_scaled, test_scaled = Prediction.split(normalized) train_targets, test_targets = Prediction.split(targets) input_var = T.row('X', dtype='float64') target_var = T.vector('y', dtype='int64') network = lasagne.layers.InputLayer((1, n_inputs), input_var) network = lasagne.layers.DenseLayer(network, 100, W=lasagne.init.GlorotUniform(), nonlinearity = lasagne.nonlinearities.rectify) network = lasagne.layers.DenseLayer(network, n_targets, nonlinearity = lasagne.nonlinearities.softmax) # create loss function prediction = lasagne.layers.get_output(network) loss = lasagne.objectives.categorical_crossentropy(prediction, target_var) loss = loss.mean() + 1e-4 * lasagne.regularization.regularize_network_params( network, lasagne.regularization.l2) # create parameter update expressions params = lasagne.layers.get_all_params(network, trainable=True) updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate=0.01, momentum=0.9) acc = T.mean(T.eq(T.argmax(prediction, axis=1), target_var), dtype=theano.config.floatX) # compile training function that updates parameters and returns training loss train_fn = theano.function([input_var, target_var], [loss, acc], updates=updates) test_prediction = lasagne.layers.get_output(network, deterministic=True) predict_fn = theano.function([input_var], T.argmax(test_prediction, axis=1)) self.predict_fn = predict_fn self.train_fn = train_fn self.train_scaled = train_scaled self.test_scaled = test_scaled self.train_targets = train_targets self.test_targets = test_targets self.log = log
def test_broadcast_mismatch(self): rng = numpy.random.RandomState(utt.fetch_seed()) data = rng.rand(5).astype(self.dtype) x = self.shared(data) #print x.broadcastable y = tensor.row('y', self.dtype) #print y.broadcastable cond = theano.tensor.iscalar('cond') self.assertRaises(TypeError, ifelse, cond, x, y) self.assertRaises(TypeError, ifelse, cond, y, x)
def test_broadcast_mismatch(self): rng = np.random.RandomState(utt.fetch_seed()) data = rng.rand(5).astype(self.dtype) x = self.shared(data) # print x.broadcastable y = tensor.row("y", self.dtype) # print y.broadcastable cond = theano.tensor.iscalar("cond") with pytest.raises(TypeError): ifelse(cond, x, y) with pytest.raises(TypeError): ifelse(cond, y, x)
def make_theano_batch(self, name=None, dtype=None, batch_size=None): if dtype is None: dtype = config.floatX if self.sparse: if batch_size is not None: raise NotImplementedError("batch_size not implemented " "for sparse case") return theano.sparse.csr_matrix(name=name) else: if batch_size == 1: return T.row(name=name, dtype=dtype) else: return T.matrix(name=name, dtype=dtype)
def make_theano_batch(self, name=None, dtype=None, batch_size=None): if dtype is None: dtype = config.floatX if self.sparse: if batch_size is not None: raise NotImplementedError("batch_size not implemented " "for sparse case") rval = theano.sparse.csr_matrix(name=name) else: if batch_size == 1: rval = T.row(name=name, dtype=dtype) else: rval = T.matrix(name=name, dtype=dtype) if config.compute_test_value != 'off': if batch_size == 1: n = 1 else: # TODO: try to extract constant scalar value from batch_size n = 4 rval.tag.test_value = self.get_origin_batch(n=n) return rval
# [False, True] column (Mx1 matrix) # [False, True, False] A Mx1xP tensor (a) # [True, False, False] A 1xNxP tensor (b) # [False, False, False] A MxNxP tensor (pattern of a + b) x = T.TensorType(dtype='int32', broadcastable=())('myvar') # config dependent float type (config.floatX is float 64 by default on x86_64) x = T.scalar(name='x', dtype=T.config.floatX) report(x) # 1-dimensional vector (ndarray). v = T.vector(dtype=T.config.floatX, name='v') report(v) # 2-dimensional ndarray in which the number of rows is guaranteed to be 1. v = T.row(name=None, dtype=T.config.floatX) report(v) # 2-dimensional ndarray in which the number of columns is guaranteed to be 1. v = T.col(name=None, dtype=T.config.floatX) report(v) # 2-dimensional ndarray v = T.matrix(name=None, dtype=T.config.floatX) report(v) # 3-dimensional ndarray v = T.tensor3(name=None, dtype=T.config.floatX) report(v) # 4-dimensional ndarray
def __init__(self, data, targets, log=False): # training, test = data[sp:], data[:sp] n_inputs = data.shape[1] n_targets = int(max(targets) + 1) # print(n_inputs, int(n_targets)) train_scale = abs(data).max(0) train_shift = data.mean(0) / train_scale normalized = (data / train_scale - train_shift)[:, None] train_scaled, test_scaled = Prediction.split(normalized) train_targets, test_targets = Prediction.split(targets) input_var = T.row('X', dtype='float64') target_var = T.vector('y', dtype='int64') network = lasagne.layers.InputLayer((1, n_inputs), input_var) network = lasagne.layers.DenseLayer( network, 100, W=lasagne.init.GlorotUniform(), nonlinearity=lasagne.nonlinearities.rectify) network = lasagne.layers.DenseLayer( network, n_targets, nonlinearity=lasagne.nonlinearities.softmax) # create loss function prediction = lasagne.layers.get_output(network) loss = lasagne.objectives.categorical_crossentropy( prediction, target_var) loss = loss.mean( ) + 1e-4 * lasagne.regularization.regularize_network_params( network, lasagne.regularization.l2) # create parameter update expressions params = lasagne.layers.get_all_params(network, trainable=True) updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate=0.01, momentum=0.9) acc = T.mean(T.eq(T.argmax(prediction, axis=1), target_var), dtype=theano.config.floatX) # compile training function that updates parameters and returns training loss train_fn = theano.function([input_var, target_var], [loss, acc], updates=updates) test_prediction = lasagne.layers.get_output(network, deterministic=True) predict_fn = theano.function([input_var], T.argmax(test_prediction, axis=1)) self.predict_fn = predict_fn self.train_fn = train_fn self.train_scaled = train_scaled self.test_scaled = test_scaled self.train_targets = train_targets self.test_targets = test_targets self.log = log
# dot product/matrix product theano_dot = theano.function([theano_matrix1, theano_matrix2], T.dot(theano_matrix1, theano_matrix2), name='theano_dot') theano_scalar = T.fscalar(name='theano_scalar') theano_scale = theano.function([theano_matrix1, theano_scalar], theano_matrix1 * theano_scalar, name='scale') # elementwise product theano_multiply = theano.function([theano_matrix1, theano_matrix2], theano_matrix1 * theano_matrix2, name='theano_multiply') theano_row_vector = T.row(name='theano_row_vector') theano_col_vector = T.col(name='theano_col_vector') theano_subtract_row = theano.function([theano_matrix1, theano_row_vector], theano_matrix1 - theano_row_vector, name='theano_subtract_row') theano_divide_row = theano.function([theano_matrix1, theano_row_vector], theano_matrix1 / theano_row_vector, name='theano_divide_row') theano_subtract_col = theano.function([theano_matrix1, theano_col_vector], theano_matrix1 - theano_col_vector, name='theano_subtract_col') theano_divide_col = theano.function([theano_matrix1, theano_col_vector], theano_matrix1 / theano_col_vector, name='theano_divide_col')
import theano import theano.tensor as T import numpy as np r = T.row() print r.broadcastable #(True, False) mtr = T.matrix() print mtr.broadcastable #(False, False) f_row = theano.function([r, mtr], [r + mtr]) R = np.arange(3).reshape(1, 3) print R #array([[0, 1, 2]]) M = np.arange(9).reshape(3, 3) print M #array([[0, 1, 2], # [3, 4, 5], # [6, 7, 8]]) print f_row(R, M) #[array([[ 0., 2., 4.], # [ 3., 5., 7.], # [ 6., 8., 10.]])] c = T.col() print c.broadcastable
def __init__(self, inputs, n_in, n_out=100, activation=TT.nnet.sigmoid, backward_activation=TT.nnet.softmax, W=None, b=None, b_hidden=None, b_visible=None, persistent=None, CD_k=1, CD_use_mean=True, sparsity_target=None, output_sparsity_target=None, numpy_rng=numpy.random.RandomState(), L1_norm=0.0, L2_norm=0.0, bias_decay=0.0, entropy_loss=0.0, centering=False, prefer_extremes=False, theano_rng=None): """ Initialize the parameters of the Replicated Softmax. This merely sets the correct values for an RBM in the defaults. (The only other difference than using the specific pair of forward/backward activations is the computation of free energy.) .. note:: In order for this model to implement the real Replicated Softmax Model of Salakhtudinov and Hinton, the ``activation`` and ``backward_activation`` parameters have to remain in their default form. :type inputs: theano.tensor.var.TensorVariable :param inputs: Symbolic variable that describes the input of the architecture (e.g., one minibatch of input images, or output of a previous layer) :type n_in: int :param n_in: Number of input units, the dimension of the space in which the data points live :type n_out: int :param n_out: The number of hidden units. :type activation: theano.tensor.elemwise.Elemwise :param activation: The nonlinearity applied at neuron output. :type backward_activation: theano.tensor.elemwise.Elemwise :param backward_activation: The nonlinearity applied at hidden neuron output. If not given, same as ``activation``. (Some RBMs, like the Replicated Softmax model, use a different forward and backward activation function.) :type W: theano.tensor.sharedvar.TensorSharedVariable :param W: Theano variable pointing to a set of weights that should be shared between the autoencoder and another architecture; if autoencoder should be standalone, leave this as None. This set of weights refers to the transition from visible to hidden layer. :type b: theano.tensor.sharedvar.TensorSharedVariable :param b: Theano variable pointing to a set of bias values that should be shared between the autoencoder and another architecture; if autoencoder should be standalone, leave this as None. This set of bias values refers to the transition from visible to hidden layer. .. note: The ``b`` name is used in the RBM for compatibility of class interface. Internally, the name ``b_hidden`` is used to improve clarity of the sometimes more complicated math expressions, and for ontological symmetry with ``b_visible``. :type b_hidden: theano.tensor.sharedvar.TensorSharedVariable :param b: Alias for b, used internally as the attribute name to make the purpose clear. .. warn: Do not use both ``b`` and ``b_hidden`` at the same time! The intended interface is ``b``, which is also used in the ``link()`` class method to construct the RBM. :type b_visible: theano.tensor.sharedvar.TensorSharedVariable :param b_visible: Theano variable pointing to a set of bias values that should be shared between the autoencoder and another architecture; if autoencoder should be standalone, leave this as None. This set of bias values refers to the transition from visible to hidden layer. :type persistent: theano.tensor.sharedvar.TensorSharedVariable :param persistent: If you wish to train using Persistent Contrastive Divergence, supply an initial state of the Markov chain. If set to None (default), use Contrastive Divergence for training (initialize the chain to the current data point). :type CD_k: int :param CD_k: How many Gibbs sampling steps should Contrastive Divergence take in generating the negative particle. :type CD_use_mean: Boolean :param CD_use_mean: Should the (P)CD Gibbs chain end use the mean activation of the visible units as the chain end? If ``False``, uses the visible sample. If ``True``, uses the visible mean. Default is ``True``. """ super(ReplicatedSoftmax, self).__init__( inputs, n_in, n_out, TT.nnet.sigmoid, TT.nnet.softmax, W, b, b_hidden, b_visible, persistent, CD_k, CD_use_mean, sparsity_target, output_sparsity_target, numpy_rng, L1_norm, L2_norm, bias_decay, entropy_loss, centering, prefer_extremes, theano_rng) print 'B: ', self.b_hidden.broadcastable self.b_hidden_broadcastable = TT.row('b_hidden_broadcastable', dtype=theano.config.floatX) print 'B/dsh: ', self.b_hidden.broadcastable print 'Hidden type:', type(self.b_hidden) TT.addbroadcast(self.b_hidden_broadcastable, 0) self.b_hidden_broadcastable.tag.test_value = numpy.ones((2, self.n_out)) print 'B/dsh: ', self.b_hidden.broadcastable
# [False, True] column (Mx1 matrix) # [False, True, False] A Mx1xP tensor (a) # [True, False, False] A 1xNxP tensor (b) # [False, False, False] A MxNxP tensor (pattern of a + b) x = T.TensorType(dtype="int32", broadcastable=())("myvar") # config dependent float type (config.floatX is float 64 by default on x86_64) x = T.scalar(name="x", dtype=T.config.floatX) report(x) # 1-dimensional vector (ndarray). v = T.vector(dtype=T.config.floatX, name="v") report(v) # 2-dimensional ndarray in which the number of rows is guaranteed to be 1. v = T.row(name=None, dtype=T.config.floatX) report(v) # 2-dimensional ndarray in which the number of columns is guaranteed to be 1. v = T.col(name=None, dtype=T.config.floatX) report(v) # 2-dimensional ndarray v = T.matrix(name=None, dtype=T.config.floatX) report(v) # 3-dimensional ndarray v = T.tensor3(name=None, dtype=T.config.floatX) report(v) # 4-dimensional ndarray
# define some functions # dot product/matrix product theano_dot = theano.function([theano_matrix1, theano_matrix2], T.dot( theano_matrix1, theano_matrix2), name='theano_dot') theano_scalar = T.fscalar(name='theano_scalar') theano_scale = theano.function( [theano_matrix1, theano_scalar], theano_matrix1 * theano_scalar, name='scale') # elementwise product theano_multiply = theano.function( [theano_matrix1, theano_matrix2], theano_matrix1 * theano_matrix2, name='theano_multiply') theano_row_vector = T.row(name='theano_row_vector') theano_col_vector = T.col(name='theano_col_vector') theano_subtract_row = theano.function( [theano_matrix1, theano_row_vector], theano_matrix1 - theano_row_vector, name='theano_subtract_row') theano_divide_row = theano.function( [theano_matrix1, theano_row_vector], theano_matrix1 / theano_row_vector, name='theano_divide_row') theano_subtract_col = theano.function( [theano_matrix1, theano_col_vector], theano_matrix1 - theano_col_vector, name='theano_subtract_col') theano_divide_col = theano.function( [theano_matrix1, theano_col_vector], theano_matrix1 / theano_col_vector, name='theano_divide_col') theano_var1 = theano.function( [theano_matrix1], T.var(theano_matrix1, 1), name='theano_var1') theano_mean0 = theano.function( [theano_matrix1], T.mean(theano_matrix1, 0), name='theano_mean0')
x = T.scalar() print(x.type) #TensorType(float64, scalar) default x = T.scalar(name='var', dtype='float32') print(x.type) print(x.get_parents()) x = T.vector(name='var', dtype='float32') print(x.type) x = T.row(name='var', dtype='float32') print(x.type) x = T.fmatrix() x.type # Custom TensorType dtensor5 = T.TensorType('float64', (False,)*5) dtensor5.dtype # Its a bit non-standard should explore more dtensor5.value_zeros((1,4)) # Why does it accept 1,4 should explore more