def test_vae(): minibatch_size = 10 data = pm.floatX(np.random.rand(100)) x_mini = pm.Minibatch(data, minibatch_size) x_inp = tt.vector() x_inp.tag.test_value = data[:minibatch_size] ae = theano.shared(pm.floatX([.1, .1])) be = theano.shared(pm.floatX(1.)) ad = theano.shared(pm.floatX(1.)) bd = theano.shared(pm.floatX(1.)) enc = x_inp.dimshuffle(0, 'x') * ae.dimshuffle('x', 0) + be mu, rho = enc[:, 0], enc[:, 1] with pm.Model(): # Hidden variables zs = pm.Normal('zs', mu=0, sd=1, shape=minibatch_size) dec = zs * ad + bd # Observation model pm.Normal('xs_', mu=dec, sd=0.1, observed=x_inp) pm.fit(1, local_rv={zs: dict(mu=mu, rho=rho)}, more_replacements={x_inp: x_mini}, more_obj_params=[ae, be, ad, bd])
def __init__(self, rng, _input, n_in, n_out, w=None, b=None, activation=T.nnet.sigmoid): self.input = _input if w is None: w_values = numpy.asarray( rng.uniform( low=-numpy.sqrt(6. / (n_in + n_out)), high=numpy.sqrt(6. / (n_in + n_out)), size=(n_in, n_out) ), dtype=theano.config.floatX ) if activation == theano.tensor.nnet.sigmoid: w_values *= 4 w = theano.shared(value=w_values, name='w', borrow=True) if b is None: b_values = numpy.zeros((n_out,), dtype=theano.config.floatX) b = theano.shared(value=b_values, name='b', borrow=True) self.w = w self.b = b lin_output = T.dot(_input, self.w) + self.b self.output = ( lin_output if activation is None else activation(lin_output) ) self.params = [self.w, self.b]
def aevb_model(): with pm.Model() as model: pm.HalfNormal('x', shape=(2,), total_size=5) pm.Normal('y', shape=(2,)) x = model.x y = model.y mu = theano.shared(x.init_value) rho = theano.shared(np.zeros_like(x.init_value)) return { 'model': model, 'y': y, 'x': x, 'replace': dict(mu=mu, rho=rho) }
def aevb_model(): with pm.Model() as model: x = pm.Normal('x') pm.Normal('y', x) x = model.x y = model.y mu = theano.shared(x.init_value) * 2 rho = theano.shared(np.zeros_like(x.init_value)) return { 'model': model, 'y': y, 'x': x, 'replace': (mu, rho) }
def test_aevb_empirical(): _, model, _ = models.exponential_beta(n=2) x = model.x mu = theano.shared(x.init_value) rho = theano.shared(np.zeros_like(x.init_value)) with model: inference = ADVI(local_rv={x: (mu, rho)}) approx = inference.approx trace0 = approx.sample(10000) approx = Empirical(trace0, local_rv={x: (mu, rho)}) trace1 = approx.sample(10000) np.testing.assert_allclose(trace0['y'].mean(0), trace1['y'].mean(0), atol=0.02) np.testing.assert_allclose(trace0['y'].var(0), trace1['y'].var(0), atol=0.02) np.testing.assert_allclose(trace0['x'].mean(0), trace1['x'].mean(0), atol=0.02) np.testing.assert_allclose(trace0['x'].var(0), trace1['x'].var(0), atol=0.02)
def test_cloning_available(self): gop = generator(integers()) res = gop ** 2 shared = theano.shared(np.float32(10)) res1 = theano.clone(res, {gop: shared}) f = theano.function([], res1) assert f() == np.float32(100)
def test_optimizer_minibatch_with_callback(self): n = 1000 sd0 = 2. mu0 = 4. sd = 3. mu = -5. data = sd * np.random.randn(n) + mu d = n / sd ** 2 + 1 / sd0 ** 2 mu_post = (n * np.mean(data) / sd ** 2 + mu0 / sd0 ** 2) / d def create_minibatch(data): while True: data = np.roll(data, 100, axis=0) yield data[:100] minibatches = create_minibatch(data) with Model(): data_t = theano.shared(next(minibatches)) def cb(*_): data_t.set_value(next(minibatches)) mu_ = Normal('mu', mu=mu0, sd=sd0, testval=0) Normal('x', mu=mu_, sd=sd, observed=data_t, total_size=n) inf = self.inference() approx = inf.fit(self.NITER * 3, callbacks=[cb], obj_n_mc=10, obj_optimizer=self.optimizer) trace = approx.sample_vp(10000) np.testing.assert_allclose(np.mean(trace['mu']), mu_post, rtol=0.4) np.testing.assert_allclose(np.std(trace['mu']), np.sqrt(1. / d), rtol=0.4)
def join_nonshared_inputs(xs, vars, shared, make_shared=False): """ Takes a list of theano Variables and joins their non shared inputs into a single input. Parameters ---------- xs : list of theano tensors vars : list of variables to join Returns ------- tensors, inarray tensors : list of same tensors but with inarray as input inarray : vector of inputs """ joined = theano.tensor.concatenate([var.ravel() for var in vars]) if not make_shared: tensor_type = joined.type inarray = tensor_type('inarray') else: inarray = theano.shared(joined.tag.test_value, 'inarray') ordering = ArrayOrdering(vars) inarray.tag.test_value = joined.tag.test_value get_var = { var.name : var for var in vars} replace = { get_var[var] : reshape_t(inarray[slc], shp).astype(dtyp) for var, slc, shp, dtyp in ordering.vmap } replace.update(shared) xs_special = [theano.clone(x, replace, strict=False) for x in xs] return xs_special, inarray
def _test_aevb(self): # add to inference that supports aevb with pm.Model() as model: x = pm.Normal('x') pm.Normal('y', x) x = model.x y = model.y mu = theano.shared(x.init_value) * 2 rho = theano.shared(np.zeros_like(x.init_value)) with model: inference = self.inference(local_rv={x: (mu, rho)}) approx = inference.fit(3, obj_n_mc=2, obj_optimizer=self.optimizer) approx.sample_vp(10) approx.apply_replacements( y, more_replacements={x: np.asarray([1, 1], dtype=x.dtype)} ).eval()
def test_aevb_histogram(self): _, model, _ = models.exponential_beta(n=2) x = model.x mu = theano.shared(x.init_value) rho = theano.shared(np.zeros_like(x.init_value)) with model: inference = ADVI(local_rv={x: (mu, rho)}) approx = inference.approx trace0 = approx.sample_vp(10000) histogram = Histogram(trace0, local_rv={x: (mu, rho)}) trace1 = histogram.sample_vp(10000) histogram.random(no_rand=True) histogram.random_fn(no_rand=True) np.testing.assert_allclose(trace0['y'].mean(0), trace1['y'].mean(0), atol=0.02) np.testing.assert_allclose(trace0['y'].var(0), trace1['y'].var(0), atol=0.02) np.testing.assert_allclose(trace0['x'].mean(0), trace1['x'].mean(0), atol=0.02) np.testing.assert_allclose(trace0['x'].var(0), trace1['x'].var(0), atol=0.02)
def test_observed_type(self): X_ = np.random.randn(100, 5) X = pm.floatX(theano.shared(X_)) with pm.Model(): x1 = pm.Normal('x1', observed=X_) x2 = pm.Normal('x2', observed=X) assert x1.type == X.type assert x2.type == X.type
def test_gen_cloning_with_shape_change(self): data = floatX(np.random.uniform(size=(1000, 10))) minibatches = DataSampler(data, batchsize=50) gen = generator(minibatches) gen_r = tt_rng().normal(size=gen.shape).T X = gen.dot(gen_r) res, _ = theano.scan(lambda x: x.sum(), X, n_steps=X.shape[0]) assert res.eval().shape == (50,) shared = theano.shared(data) res2 = theano.clone(res, {gen: shared**2}) assert res2.eval().shape == (1000,)
def __init__(self, cost, grad_vars, extra_vars=None, dtype=None, casting='no', **kwargs): if extra_vars is None: extra_vars = [] names = [arg.name for arg in grad_vars + extra_vars] if any(name is None for name in names): raise ValueError('Arguments must be named.') if len(set(names)) != len(names): raise ValueError('Names of the arguments are not unique.') if cost.ndim > 0: raise ValueError('Cost must be a scalar.') self._grad_vars = grad_vars self._extra_vars = extra_vars self._extra_var_names = set(var.name for var in extra_vars) self._cost = cost self._ordering = ArrayOrdering(grad_vars) self.size = self._ordering.size self._extra_are_set = False if dtype is None: dtype = theano.config.floatX self.dtype = dtype for var in self._grad_vars: if not np.can_cast(var.dtype, self.dtype, casting): raise TypeError('Invalid dtype for variable %s. Can not ' 'cast to %s with casting rule %s.' % (var.name, self.dtype, casting)) if not np.issubdtype(var.dtype, float): raise TypeError('Invalid dtype for variable %s. Must be ' 'floating point but is %s.' % (var.name, var.dtype)) givens = [] self._extra_vars_shared = {} for var in extra_vars: shared = theano.shared(var.tag.test_value, var.name + '_shared__') self._extra_vars_shared[var.name] = shared givens.append((var, shared)) self._vars_joined, self._cost_joined = self._build_joined( self._cost, grad_vars, self._ordering.vmap) grad = tt.grad(self._cost_joined, self._vars_joined) grad.name = '__grad' inputs = [self._vars_joined] self._theano_function = theano.function( inputs, [self._cost_joined, grad], givens=givens, **kwargs)
def __init__(self, _input, n_in, n_out, activation=T.nnet.sigmoid): self.input = _input self.w = theano.shared( value=numpy.zeros( (n_in, n_out), dtype=theano.config.floatX ), name='w', borrow=True ) self.b = theano.shared( value=numpy.zeros( (n_out,), dtype=theano.config.floatX ), name='b', borrow=True ) self.prediction = activation(T.dot(_input, self.w) + self.b) self.params = [self.w, self.b]
def test_gradient_with_scaling(self): with pm.Model() as model1: genvar = generator(gen1()) m = Normal('m') Normal('n', observed=genvar, total_size=1000) grad1 = theano.function([m], tt.grad(model1.logpt, m)) with pm.Model() as model2: m = Normal('m') shavar = theano.shared(np.ones((1000, 100))) Normal('n', observed=shavar) grad2 = theano.function([m], tt.grad(model2.logpt, m)) for i in range(10): shavar.set_value(np.ones((100, 100)) * i) g1 = grad1(1) g2 = grad2(1) np.testing.assert_almost_equal(g1, g2)
def make_shared_replacements(vars, model): """ Makes shared replacements for all *other* variables than the ones passed. This way functions can be called many times without setting unchanging variables. Allows us to use func.trust_input by removing the need for DictToArrayBijection and kwargs. Parameters ---------- vars : list of variables not to make shared model : model Returns ------- Dict of variable -> new shared variable """ othervars = set(model.vars) - set(vars) return {var : theano.shared(var.tag.test_value, var.name + '_shared') for var in othervars }
class _approx: params = (theano.shared(np.asarray([1, 2, 3])), )
def shared_zeros(shape, dtype=theano.config.floatX, name='', n=None): shape = shape if n is None else (n, ) + shape return theano.shared(numpy.zeros(shape, dtype=dtype), name=name)
def aevb_initial(): return theano.shared(np.random.rand(3, 7).astype('float32'))
from __future__ import print_function import numpy from theano import theano import theano.tensor as T import pickle, gzip import timeit data_dir = "/sharedfiles/" print("Using device", theano.config.device) print("Loading data") with gzip.open(data_dir + "mnist.pkl.gz", 'rb') as f: train_set, valid_set, test_set = pickle.load(f) train_set_x = theano.shared( numpy.asarray(train_set[0], dtype=theano.config.floatX)) train_set_y = theano.shared(numpy.asarray(train_set[1], dtype='int32')) print("Building model") batch_size = 600 n_in = 28 * 28 n_hidden = 500 n_out = 10 x = T.matrix('x') y = T.ivector('y') def shared_zeros(shape, dtype=theano.config.floatX, name='', n=None): shape = shape if n is None else (n, ) + shape
def __init__(self, input, filter_shape, image_shape, poolsize=(2, 2)): """ Allocate a NetConvPoolLayer with shared variable internal parameters. :type input: theano.tensor.dtensor4 :param input: symbolic image tensor, of shape image_shape :type filter_shape: tuple or list of length 4 :param filter_shape: (number of filters, num input feature maps, filter height, filter width) :type image_shape: tuple or list of length 4 :param image_shape: (batch size, num input feature maps, image height, image width) :type poolsize: tuple or list of length 2 :param poolsize: the downsampling (pooling) factor (#rows, #cols) """ assert image_shape[1] == filter_shape[1] self.input = input # there are "num input feature maps * filter height * filter width" # inputs to each hidden unit fan_in = np.prod(filter_shape[1:]) # each unit in the lower layer receives a gradient from: # "num output feature maps * filter height * filter width" / # pooling size fan_out = (filter_shape[0] * np.prod(filter_shape[2:]) // np.prod(poolsize)) # initialize weights with random weights W_bound = np.sqrt(6. / (fan_in + fan_out)) self.W = theano.shared( np.random.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX, name = 'W') # the bias is a 1D tensor -- one bias per output feature map b_values = np.zeros((filter_shape[0],), dtype=theano.config.floatX) self.b = theano.shared(value=b_values, borrow=True) # convolve input feature maps with filters self.conv_out = conv2d( input=input, filters=self.W, filter_shape=filter_shape, image_shape=image_shape ) # downsample each feature map individually, using maxpooling self.pooled_out = downsample.max_pool_2d( input=self.conv_out, ds=poolsize, ignore_border=True ) # add the bias term. Since the bias is a vector (1D array), we first # reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will # thus be broadcasted across mini-batches and feature map # width & height self.output = T.tanh(self.pooled_out + self.b.dimshuffle('x', 0, 'x', 'x')) # store parameters of this layer self.params = [self.W, self.b] # keep track of model input self.input = input