class SimpleBernoulliSampleLayer(lasagne.layers.Layer): """ Simple sampling layer drawing samples from bernoulli distributions. Parameters ---------- mean : :class:`Layer` instances Parameterizing the mean value of each bernoulli distribution seed : int seed to random stream Methods ---------- seed : Helper function to change the random seed after init is called """ def __init__(self, mean, seed=lasagne.random.get_rng().randint(1, 2147462579), **kwargs): super(SimpleBernoulliSampleLayer, self).__init__(mean, **kwargs) self._srng = RandomStreams(seed) def seed(self, seed=lasagne.random.get_rng().randint(1, 2147462579)): self._srng.seed(seed) def get_output_shape_for(self, input_shape): return input_shape def get_output_for(self, mu, deterministic=False, **kwargs): if deterministic: z = T.switch(mu >= 0.5, T.ones_like(mu), T.zeros_like(mu)) else: z = self._srng.binomial(size=mu.shape, p=mu, dtype=mu.dtype) return z
class SimpleBernoulliSampleLayer(lasagne.layers.Layer): """ Simple sampling layer drawing samples from bernoulli distributions. Parameters ---------- mean : :class:`Layer` instances Parameterizing the mean value of each bernoulli distribution seed : int seed to random stream Methods ---------- seed : Helper function to change the random seed after init is called """ def __init__(self, mean, seed=lasagne.random.get_rng().randint(1, 2147462579), **kwargs): super(SimpleBernoulliSampleLayer, self).__init__(mean, **kwargs) self._srng = RandomStreams(seed) def seed(self, seed=lasagne.random.get_rng().randint(1, 2147462579)): self._srng.seed(seed) def get_output_shape_for(self, input_shape): return input_shape def get_output_for(self, mu, **kwargs): return self._srng.binomial(size=mu.shape, p=mu, dtype=mu.dtype)
class SimpleSampleLayer(L.MergeLayer): """ Simple sampling layer drawing a single Monte Carlo sample to approximate E_q [log( p(x,z) / q(z|x) )]. This is the approach described in [KINGMA]_. """ def __init__(self, mean, log_var, seed=lasagne.random.get_rng().randint(1, 2147462579), **kwargs): super(SimpleSampleLayer, self).__init__([mean, log_var], **kwargs) self._srng = RandomStreams(seed) def seed(self, seed=lasagne.random.get_rng().randint(1, 2147462579)): self._srng.seed(seed) def get_output_shape_for(self, input_shapes): return input_shapes[0] def get_output_for(self, input, deterministic=False): mu, log_var = input eps = self._srng.normal(mu.shape) z = mu + T.exp(0.5 * log_var) * eps if deterministic: z = mu return z
class BernoulliSampleLayer(lasagne.layers.Layer): """ Bernoulli Sampling layer supporting importance sampling Parameters ---------- mean : class:`Layer` instance Parameterizing the mean value of each bernoulli distribution eq_samples : int or T.scalar Number of Monte Carlo samples used to estimate the expectation over iw_samples : int or T.scalar Number of importance samples in the sum over k seed : int seed to random stream Methods ---------- seed : Helper function to change the random seed after init is called """ def __init__(self, mean, eq_samples=1, iw_samples=1, seed=lasagne.random.get_rng().randint(1, 2147462579), **kwargs): super(BernoulliSampleLayer, self).__init__(mean, **kwargs) self.eq_samples = eq_samples self.iw_samples = iw_samples self._srng = RandomStreams(seed) def seed(self, seed=lasagne.random.get_rng().randint(1, 2147462579)): self._srng.seed(seed) def get_output_shape_for(self, input_shape): batch_size, num_latent = input_shape if isinstance(batch_size, int) and \ isinstance(self.iw_samples, int) and \ isinstance(self.eq_samples, int): out_dim = (batch_size * self.eq_samples * self.iw_samples, num_latent) else: out_dim = (None, num_latent) return out_dim def get_output_for(self, input, **kwargs): mu = input batch_size, num_latent = mu.shape shp = (batch_size, self.eq_samples, self.iw_samples, num_latent) mu_shp = mu.dimshuffle(0, 'x', 'x', 1) mu_shp = T.repeat(mu_shp, axis=1, repeats=self.eq_samples) mu_shp = T.repeat(mu_shp, axis=2, repeats=self.iw_samples) samples = self._srng.binomial(size=shp, p=mu_shp, dtype=theano.config.floatX) return samples.reshape((-1, num_latent))
def test_seed_fn(): test_use_cuda = [False] if cuda_available: test_use_cuda.append(True) idx = tensor.ivector() for use_cuda in test_use_cuda: if config.mode == 'FAST_COMPILE' and use_cuda: mode = 'FAST_RUN' else: mode = config.mode for new_seed, same in [(234, True), (None, True), (23, False)]: random = MRG_RandomStreams(234, use_cuda=use_cuda) fn1 = theano.function([], random.uniform((2, 2), dtype='float32'), mode=mode) fn2 = theano.function([], random.uniform((3, 3), nstreams=2, dtype='float32'), mode=mode) fn3 = theano.function([idx], random.uniform(idx, nstreams=3, ndim=1, dtype='float32'), mode=mode) fn1_val0 = fn1() fn1_val1 = fn1() assert not numpy.allclose(fn1_val0, fn1_val1) fn2_val0 = fn2() fn2_val1 = fn2() assert not numpy.allclose(fn2_val0, fn2_val1) fn3_val0 = fn3([4]) fn3_val1 = fn3([4]) assert not numpy.allclose(fn3_val0, fn3_val1) assert fn1_val0.size == 4 assert fn2_val0.size == 9 random.seed(new_seed) fn1_val2 = fn1() fn1_val3 = fn1() fn2_val2 = fn2() fn2_val3 = fn2() fn3_val2 = fn3([4]) fn3_val3 = fn3([4]) assert numpy.allclose(fn1_val0, fn1_val2) == same assert numpy.allclose(fn1_val1, fn1_val3) == same assert numpy.allclose(fn2_val0, fn2_val2) == same assert numpy.allclose(fn2_val1, fn2_val3) == same assert numpy.allclose(fn3_val0, fn3_val2) == same assert numpy.allclose(fn3_val1, fn3_val3) == same
class NaiveCoulombShuffleLayer(lasagne.layers.Layer): """ Assumes the input to be a minibatch of coulomb matrices [BATCH, 1, 29, 29] The shuffling is as described in [MONTAVON]_. Parameters ---------- coulomb: :class:`Layer` instances Parameterizing the coulomb matrix as described in [MONTAVON]_. The code assumes that these have the same number of dimensions. seed : int seed to random stream axis : int the dimension to permute Methods ---------- seed : Helper function to change the random seed after init is called References ---------- .. [MONTAVON] Grégoire Montavon et al 2013 New J. Phys. 15 095003 "Machine learning of molecular electronic properties in chemical compound space" http://iopscience.iop.org/article/10.1088/1367-2630/15/9/095003#citations. """ def __init__(self, coulomb, seed=lasagne.random.get_rng().randint(1, 2147462579), axis=2, **kwargs): super(NaiveCoulombShuffleLayer, self).__init__([coulomb], **kwargs) self._srng = RandomStreams(seed) self.axis = axis def seed(self, seed=lasagne.random.get_rng().randint(1, 2147462579)): self._srng.seed(seed) def get_output_shape_for(self, input_shapes): #The shape doesn't change, this layer only shuffles the rows. return input_shapes[0] def get_output_for(self, input, **kwargs): coulomb = input shape = coulomb.shape # coulomb matrices are symmetric so it doesn't matter which axis we find the norm over norm = T.norm(coulomb, axis=self.axis) # we want as many random numbers as (batchsize, axis representing coulomb matrix rows) eps = self._srng.normal(norm.shape) idxs = T.argsort(norm + eps) z = mu + T.exp(0.5 * log_var) * eps return z
class BernoulliSampleLayer(lasagne.layers.Layer): """ Bernoulli Sampling layer supporting importance sampling Parameters ---------- mean : class:`Layer` instance Parameterizing the mean value of each bernoulli distribution eq_samples : int or T.scalar Number of Monte Carlo samples used to estimate the expectation over iw_samples : int or T.scalar Number of importance samples in the sum over k seed : int seed to random stream Methods ---------- seed : Helper function to change the random seed after init is called """ def __init__(self, mean, eq_samples=1, iw_samples=1, seed=lasagne.random.get_rng().randint(1, 2147462579), **kwargs): super(BernoulliSampleLayer, self).__init__(mean, **kwargs) self.eq_samples = eq_samples self.iw_samples = iw_samples self._srng = RandomStreams(seed) def seed(self, seed=lasagne.random.get_rng().randint(1, 2147462579)): self._srng.seed(seed) def get_output_shape_for(self, input_shape): batch_size, num_latent = input_shape if isinstance(batch_size, int) and \ isinstance(self.iw_samples, int) and \ isinstance(self.eq_samples, int): out_dim = (batch_size*self.eq_samples*self.iw_samples, num_latent) else: out_dim = (None, num_latent) return out_dim def get_output_for(self, input, **kwargs): mu = input batch_size, num_latent = mu.shape shp = (batch_size, self.eq_samples, self.iw_samples, num_latent) mu_shp = mu.dimshuffle(0,'x','x',1) mu_shp = T.repeat(mu_shp, axis=1, repeats=self.eq_samples) mu_shp = T.repeat(mu_shp, axis=2, repeats=self.iw_samples) samples = self._srng.binomial( size=shp, p=mu_shp, dtype=theano.config.floatX) return samples.reshape((-1, num_latent))
class SimpleConcreteSampleLayer(lasagne.layers.Layer): """ Simple sampling layer drawing a single Monte Carlo sample to approximate E_q [log( p(x,z) / q(z|x) )]. This is the approach described in [KINGMA]_. Parameters ---------- mu, log_var : :class:`Layer` instances Parameterizing the mean and log(variance) of the distribution to sample from as described in [KINGMA]_. The code assumes that these have the same number of dimensions. seed : int seed to random stream Methods ---------- seed : Helper function to change the random seed after init is called References ---------- .. [KINGMA] Kingma, Diederik P., and Max Welling. "Auto-Encoding Variational Bayes." arXiv preprint arXiv:1312.6114 (2013). """ def __init__(self, logits, seed=lasagne.random.get_rng().randint(1, 2147462579), **kwargs): super(SimpleSampleLayer, self).__init__(logits, **kwargs) self._srng = RandomStreams(seed) def seed(self, seed=lasagne.random.get_rng().randint(1, 2147462579)): self._srng.seed(seed) def get_output_shape_for(self, input_shapes): return input_shapes def get_output_for(self, input, deterministic=False, **kwargs): logits = input if deterministic: z = T.nnet.softmax(logits) else: shape = logits.shape U = self._srng.uniform(shape, dtype=theano.config.floatX) gumbel_sampel = -T.log(-T.log(U + 1e-20) + 1e-20) y = logits + gumbel_sample z = T.nnet.softmax(y / 1) return z
def test_seed_fn(): test_use_cuda = [False] if cuda_available: test_use_cuda.append(True) idx = tensor.ivector() for use_cuda in test_use_cuda: if config.mode == 'FAST_COMPILE' and use_cuda: mode = 'FAST_RUN' else: mode = config.mode for new_seed, same in [(234, True), (None, True), (23, False)]: random = MRG_RandomStreams(234, use_cuda=use_cuda) fn1 = theano.function([], random.uniform((2, 2), dtype='float32'), mode=mode) fn2 = theano.function([], random.uniform((3, 3), nstreams=2, dtype='float32'), mode=mode) fn3 = theano.function([idx], random.uniform(idx, nstreams=3, ndim=1, dtype='float32'), mode=mode) fn1_val0 = fn1() fn1_val1 = fn1() assert not numpy.allclose(fn1_val0, fn1_val1) fn2_val0 = fn2() fn2_val1 = fn2() assert not numpy.allclose(fn2_val0, fn2_val1) fn3_val0 = fn3([4]) fn3_val1 = fn3([4]) assert not numpy.allclose(fn3_val0, fn3_val1) assert fn1_val0.size == 4 assert fn2_val0.size == 9 random.seed(new_seed) fn1_val2 = fn1() fn1_val3 = fn1() fn2_val2 = fn2() fn2_val3 = fn2() fn3_val2 = fn3([4]) fn3_val3 = fn3([4]) assert numpy.allclose(fn1_val0, fn1_val2) == same assert numpy.allclose(fn1_val1, fn1_val3) == same assert numpy.allclose(fn2_val0, fn2_val2) == same assert numpy.allclose(fn2_val1, fn2_val3) == same assert numpy.allclose(fn3_val0, fn3_val2) == same assert numpy.allclose(fn3_val1, fn3_val3) == same
class SimpleSampleLayer(lasagne.layers.MergeLayer): """ Simple sampling layer drawing a single Monte Carlo sample to approximate E_q [log( p(x,z) / q(z|x) )]. This is the approach described in [KINGMA]_. Parameters ---------- mu, log_var : :class:`Layer` instances Parameterizing the mean and log(variance) of the distribution to sample from as described in [KINGMA]_. The code assumes that these have the same number of dimensions. seed : int seed to random stream Methods ---------- seed : Helper function to change the random seed after init is called References ---------- .. [KINGMA] Kingma, Diederik P., and Max Welling. "Auto-Encoding Variational Bayes." arXiv preprint arXiv:1312.6114 (2013). """ def __init__(self, mean, log_var, seed=lasagne.random.get_rng().randint(1, 2147462579), **kwargs): super(SimpleSampleLayer, self).__init__([mean, log_var], **kwargs) self._srng = RandomStreams(seed) def seed(self, seed=lasagne.random.get_rng().randint(1, 2147462579)): self._srng.seed(seed) def get_output_shape_for(self, input_shapes): return input_shapes[0] def get_output_for(self, input, deterministic=False, **kwargs): mu, log_var = input if deterministic: z = mu else: eps = self._srng.normal(mu.shape, dtype=theano.config.floatX) z = mu + T.exp(0.5 * log_var) * eps return z
class BernoulliSampleLayer(lasagne.layers.Layer): def __init__(self, mean, seed=lasagne.random.get_rng().randint(1, 2147462579), **kwargs): super(BernoulliSampleLayer, self).__init__(mean, **kwargs) self._srng = RandomStreams(seed) def seed(self, seed=lasagne.random.get_rng().randint(1, 2147462579)): self._srng.seed(seed) def get_output_shape_for(self, input_shape): return input_shape def get_output_for(self, mu, **kwargs): return self._srng.binomial(size=mu.shape, p=mu, dtype=mu.dtype)
class SimpleSampleLayer(lasagne.layers.MergeLayer): """ Simple sampling layer drawing a single Monte Carlo sample to approximate E_q [log( p(x,z) / q(z|x) )]. This is the approach described in [KINGMA]_. Parameters ---------- mu, log_var : :class:`Layer` instances Parameterizing the mean and log(variance) of the distribution to sample from as described in [KINGMA]_. The code assumes that these have the same number of dimensions. seed : int seed to random stream Methods ---------- seed : Helper function to change the random seed after init is called References ---------- .. [KINGMA] Kingma, Diederik P., and Max Welling. "Auto-Encoding Variational Bayes." arXiv preprint arXiv:1312.6114 (2013). """ def __init__(self, mean, log_var, seed=lasagne.random.get_rng().randint(1, 2147462579), **kwargs): super(SimpleSampleLayer, self).__init__([mean, log_var], **kwargs) self._srng = RandomStreams(seed) def seed(self, seed=lasagne.random.get_rng().randint(1, 2147462579)): self._srng.seed(seed) def get_output_shape_for(self, input_shapes): return input_shapes[0] def get_output_for(self, input, **kwargs): mu, log_var = input eps = self._srng.normal(mu.shape) z = mu + T.exp(0.5 * log_var) * eps return z
def test_seed_fn(): idx = tensor.ivector() for new_seed, same in [(234, True), (None, True), (23, False)]: random = MRG_RandomStreams(234) fn1 = theano.function([], random.uniform((2, 2), dtype='float32')) fn2 = theano.function([], random.uniform((3, 3), nstreams=2, dtype='float32')) fn3 = theano.function([idx], random.uniform(idx, nstreams=3, ndim=1, dtype='float32')) fn1_val0 = fn1() fn1_val1 = fn1() assert not np.allclose(fn1_val0, fn1_val1) fn2_val0 = fn2() fn2_val1 = fn2() assert not np.allclose(fn2_val0, fn2_val1) fn3_val0 = fn3([4]) fn3_val1 = fn3([4]) assert not np.allclose(fn3_val0, fn3_val1) assert fn1_val0.size == 4 assert fn2_val0.size == 9 random.seed(new_seed) fn1_val2 = fn1() fn1_val3 = fn1() fn2_val2 = fn2() fn2_val3 = fn2() fn3_val2 = fn3([4]) fn3_val3 = fn3([4]) assert np.allclose(fn1_val0, fn1_val2) == same assert np.allclose(fn1_val1, fn1_val3) == same assert np.allclose(fn2_val0, fn2_val2) == same assert np.allclose(fn2_val1, fn2_val3) == same assert np.allclose(fn3_val0, fn3_val2) == same assert np.allclose(fn3_val1, fn3_val3) == same
def test_seed_fn(): idx = tensor.ivector() for new_seed, same in [(234, True), (None, True), (23, False)]: random = MRG_RandomStreams(234) fn1 = theano.function([], random.uniform((2, 2), dtype='float32')) fn2 = theano.function([], random.uniform((3, 3), nstreams=2, dtype='float32')) fn3 = theano.function([idx], random.uniform(idx, nstreams=3, ndim=1, dtype='float32')) fn1_val0 = fn1() fn1_val1 = fn1() assert not np.allclose(fn1_val0, fn1_val1) fn2_val0 = fn2() fn2_val1 = fn2() assert not np.allclose(fn2_val0, fn2_val1) fn3_val0 = fn3([4]) fn3_val1 = fn3([4]) assert not np.allclose(fn3_val0, fn3_val1) assert fn1_val0.size == 4 assert fn2_val0.size == 9 random.seed(new_seed) fn1_val2 = fn1() fn1_val3 = fn1() fn2_val2 = fn2() fn2_val3 = fn2() fn3_val2 = fn3([4]) fn3_val3 = fn3([4]) assert np.allclose(fn1_val0, fn1_val2) == same assert np.allclose(fn1_val1, fn1_val3) == same assert np.allclose(fn2_val0, fn2_val2) == same assert np.allclose(fn2_val1, fn2_val3) == same assert np.allclose(fn3_val0, fn3_val2) == same assert np.allclose(fn3_val1, fn3_val3) == same
class SampleLayer(lasagne.layers.MergeLayer): """ Sampling layer supporting importance sampling as described in [BURDA]_ and multiple Monte Carlo samples for the approximation of E_q [log( p(x,z) / q(z|x) )]. Parameters ---------- mu : class:`Layer` instance Parameterizing the mean of the distribution to sample from as described in [BURDA]_. log_var : class:`Layer` instance By default assumed to parametrize log(sigma^2) of the distribution to sample from as described in [BURDA]_ which is transformed to sigma using the nonlinearity function as described below. Effectively this means that the nonlinearity function controls what log_var parametrizes. A few common examples: -nonlinearity = lambda x: T.exp(0.5*x) => log_var = log(sigma^2)[default] -nonlinearity = lambda x: T.sqrt(x) => log_var = sigma^2 -nonlinearity = lambda x: x => log_var = sigma eq_samples : int or T.scalar Number of Monte Carlo samples used to estimate the expectation over q(z|x) in eq. (8) in [BURDA]_. iw_samples : int or T.scalar Number of importance samples in the sum over k in eq. (8) in [BURDA]_. nonlinearity : callable or None The nonlinearity that is applied to the log_var input layer to transform it into a standard deviation. By default we assume that log_var = log(sigma^2) and hence the corresponding nonlinearity is f(x) = T.exp(0.5*x) such that T.exp(0.5*log(sigma^2)) = sigma seed : int seed to random stream Methods ---------- seed : Helper function to change the random seed after init is called References ---------- .. [BURDA] Burda, Yuri, Roger Grosse, and Ruslan Salakhutdinov. "Importance Weighted Autoencoders." arXiv preprint arXiv:1509.00519 (2015). """ def __init__(self, mean, log_var, eq_samples=1, iw_samples=1, nonlinearity=lambda x: T.exp(0.5 * x), seed=lasagne.random.get_rng().randint(1, 2147462579), **kwargs): super(SampleLayer, self).__init__([mean, log_var], **kwargs) self.eq_samples = eq_samples self.iw_samples = iw_samples self.nonlinearity = nonlinearity self._srng = RandomStreams(seed) def seed(self, seed=lasagne.random.get_rng().randint(1, 2147462579)): self._srng.seed(seed) def get_output_shape_for(self, input_shapes): batch_size, num_latent = input_shapes[0] if isinstance(batch_size, int) and \ isinstance(self.iw_samples, int) and \ isinstance(self.eq_samples, int): out_dim = (batch_size * self.eq_samples * self.iw_samples, num_latent) else: out_dim = (None, num_latent) return out_dim def get_output_for(self, input, deterministic=False, **kwargs): mu, log_var = input batch_size, num_latent = mu.shape if deterministic: z = mu.dimshuffle(0, 'x', 'x', 1) * T.ones( (batch_size, self.eq_samples, self.iw_samples, num_latent)) else: eps = self._srng.normal( [batch_size, self.eq_samples, self.iw_samples, num_latent], dtype=theano.config.floatX) z = mu.dimshuffle(0,'x','x',1) + \ self.nonlinearity( log_var.dimshuffle(0,'x','x',1)) * eps return z.reshape((-1, num_latent))
class GaussianMade_SVI: """ Implements a Made, where each conditional probability is modelled by a single gaussian component. This made is trained with stochastic variational inference, using the local reparameterization trick. References: Germain et al., "MADE: Masked Autoencoder for Distribution Estimation", ICML, 2015. Kingma et al., "Variational Dropout and the Local Reparameterization Trick", NIPS, 2015. """ def __init__(self, n_inputs, n_hiddens, act_fun, input_order='sequential', mode='sequential', input=None, rng=np.random): """ Constructor. :param n_inputs: number of inputs :param n_hiddens: list with number of hidden units for each hidden layer :param act_fun: name of activation function :param input_order: order of inputs :param mode: strategy for assigning degrees to hidden nodes: can be 'random' or 'sequential' :param input: theano variable to serve as input; if None, a new variable is created """ # save input arguments self.n_inputs = n_inputs self.n_hiddens = n_hiddens self.act_fun = act_fun self.mode = mode # create network's parameters degrees = create_degrees(n_inputs, n_hiddens, input_order, mode, rng) Ms, Mmp = create_masks(degrees) mWs, mbs, sWs, sbs, mWm, mbm, sWm, sbm, mWp, mbp, sWp, sbp = create_weights_SVI(n_inputs, n_hiddens, rng) self.mps = mWs + mbs + [mWm, mbm, mWp, mbp] self.sps = sWs + sbs + [sWm, sbm, sWp, sbp] self.parms = self.mps + self.sps self.input_order = degrees[0] self.srng = RandomStreams(rng.randint(2**30)) # activation function f = util.ml.select_theano_act_function(act_fun, dtype) # input matrix self.input = tt.matrix('x', dtype=dtype) if input is None else input h = self.input uas = [] # feedforward propagation for l, (M, mW, mb, sW, sb, N) in enumerate(izip(Ms, mWs, mbs, sWs, sbs, n_hiddens)): ma = tt.dot(h, M * mW) + mb sa = tt.dot(h**2, M * tt.exp(2*sW)) + tt.exp(2*sb) ua = self.srng.normal((h.shape[0], N), dtype=dtype) h = f(tt.sqrt(sa) * ua + ma) h.name = 'h' + str(l + 1) uas.append(ua) # output means mam = tt.dot(h, Mmp * mWm) + mbm sam = tt.dot(h**2, Mmp * tt.exp(2*sWm)) + tt.exp(2*sbm) uam = self.srng.normal((h.shape[0], n_inputs), dtype=dtype) self.m = tt.sqrt(sam) * uam + mam self.m.name = 'm' # output log precisions map = tt.dot(h, Mmp * mWp) + mbp sap = tt.dot(h**2, Mmp * tt.exp(2*sWp)) + tt.exp(2*sbp) uap = self.srng.normal((h.shape[0], n_inputs), dtype=dtype) self.logp = tt.sqrt(sap) * uap + map self.logp.name = 'logp' # random numbers driving made self.u = tt.exp(0.5 * self.logp) * (self.input - self.m) # log likelihoods self.L = -0.5 * (n_inputs * np.log(2 * np.pi) + tt.sum(self.u ** 2 - self.logp, axis=1)) self.L.name = 'L' # train objective self.trn_loss = -tt.mean(self.L) self.trn_loss.name = 'trn_loss' # collect all noise variables self.all_us = uas + [uam, uap] # theano evaluation functions, will be compiled when first needed self.eval_lprob_f = None self.eval_comps_f = None self.eval_lprob_f_rand = None self.eval_comps_f_rand = None self.eval_lprob_f_rand_const = None self.eval_comps_f_rand_const = None def reset_theano_functions(self): """ Resets theano functions, so that they are compiled again when needed. """ self.eval_lprob_f = None self.eval_comps_f = None self.eval_lprob_f_rand = None self.eval_comps_f_rand = None self.eval_lprob_f_rand_const = None self.eval_comps_f_rand_const = None def _create_constant_noise_across_datapoints(self, n_data): """ Helper function. Creates and returns new theano variables representing noise, where noise is the same across datapoints in the minibatch. Useful for binding the original noise variables in an evaluation function where randomness is required but same predictions are needed across minibatch. """ uas = [tt.tile(self.srng.normal((N,), dtype=dtype), [n_data, 1]) for N in self.n_hiddens] uam = tt.tile(self.srng.normal((self.n_inputs,), dtype=dtype), [n_data, 1]) uap = tt.tile(self.srng.normal((self.n_inputs,), dtype=dtype), [n_data, 1]) return uas + [uam, uap] def _create_zero_noise(self, n_data): """ Helper function. Creates and returns new theano variables representing zero noise. Useful for binding the original noise variables in an evaluation function where randomness is not required. """ uas = [tt.zeros((n_data, N), dtype=dtype) for N in self.n_hiddens] uam = tt.zeros((n_data, self.n_inputs), dtype=dtype) uap = tt.zeros((n_data, self.n_inputs), dtype=dtype) return uas + [uam, uap] def eval(self, x, log=True, rand=False, const_noise=True): """ Evaluate log probabilities for given inputs. :param x: data matrix where rows are inputs :param log: whether to return probabilities in the log domain :param rand: whether to inject randomness to the activations :param const_noise: whether the injected randomness is the same across datapoints :return: list of log probabilities log p(x) """ x = np.asarray(x, dtype=dtype) one_datapoint = x.ndim == 1 x = x[np.newaxis, :] if one_datapoint else x if rand: if const_noise: # compile theano function, if haven't already done so if self.eval_lprob_f_rand_const is None: n_data = tt.iscalar('n_data') all_us = self._create_constant_noise_across_datapoints(n_data) self.eval_lprob_f_rand_const = theano.function( inputs=[self.input, n_data], outputs=self.L, givens=zip(self.all_us, all_us) ) lprob = self.eval_lprob_f_rand_const(x, x.shape[0]) else: # compile theano function, if haven't already done so if self.eval_lprob_f_rand is None: self.eval_lprob_f_rand = theano.function( inputs=[self.input], outputs=self.L ) lprob = self.eval_lprob_f_rand(x) else: # compile theano function, if haven't already done so if self.eval_lprob_f is None: n_data = tt.iscalar('n_data') all_us = self._create_zero_noise(n_data) self.eval_lprob_f = theano.function( inputs=[self.input, n_data], outputs=self.L, givens=zip(self.all_us, all_us) ) lprob = self.eval_lprob_f(x, x.shape[0]) lprob = lprob[0] if one_datapoint else lprob return lprob if log else np.exp(lprob) def eval_comps(self, x, rand=False, const_noise=False): """ Evaluate the parameters of all gaussians at given input locations. :param x: rows are input locations :param rand: whether to inject randomness to the activations :param const_noise: whether the injected randomness is the same across datapoints :return: means and log precisions """ x = np.asarray(x, dtype=dtype) one_datapoint = x.ndim == 1 x = x[np.newaxis, :] if one_datapoint else x if rand: if const_noise: # compile theano function, if haven't already done so if self.eval_comps_f_rand_const is None: n_data = tt.iscalar('n_data') all_us = self._create_constant_noise_across_datapoints(n_data) self.eval_comps_f_rand_const = theano.function( inputs=[self.input, n_data], outputs=[self.m, self.logp], givens=zip(self.all_us, all_us) ) comps = self.eval_comps_f_rand_const(x, x.shape[0]) else: # compile theano function, if haven't already done so if self.eval_comps_f_rand is None: self.eval_comps_f_rand = theano.function( inputs=[self.input], outputs=[self.m, self.logp] ) comps = self.eval_comps_f_rand(x) else: # compile theano function, if haven't already done so if self.eval_comps_f is None: n_data = tt.iscalar('n_data') all_us = self._create_zero_noise(n_data) self.eval_comps_f = theano.function( inputs=[self.input, n_data], outputs=[self.m, self.logp], givens=zip(self.all_us, all_us) ) comps = self.eval_comps_f(x, x.shape[0]) return map(lambda u: u[0], comps) if one_datapoint else comps def gen(self, n_samples=None, rand=False, const_noise=False, u=None, rng=np.random): """ Generate samples from made. Requires as many evaluations as number of inputs. :param n_samples: number of samples, 1 if None :param rand: whether to inject randomness to the activations :param const_noise: whether the injected randomness is the same across samples :param u: random numbers to use in generating samples; if None, new random numbers are drawn :return: samples """ if n_samples is None: return self.gen(1, rand, const_noise, u if u is None else u[np.newaxis, :], rng)[0] x = np.zeros([n_samples, self.n_inputs], dtype=dtype) u = rng.randn(n_samples, self.n_inputs).astype(dtype) if u is None else u # seed for theano random stream seed = rng.randint(2**30) for i in xrange(1, self.n_inputs + 1): self.srng.seed(seed) # need to have same activation noise in each pass m, logp = self.eval_comps(x, rand=rand, const_noise=const_noise) idx = np.argwhere(self.input_order == i)[0, 0] x[:, idx] = m[:, idx] + np.exp(np.minimum(-0.5 * logp[:, idx], 10.0)) * u[:, idx] return x def calc_random_numbers(self, x): raise NotImplementedError()
def main(base_dir): """Implement Bayesian network and plot resultant parameter estimates.""" # Set numpy and theano RNG seeds for consistency np.random.seed(123) th_rng = MRG_RandomStreams() th_rng.seed(123) # Set number of observations to use for parameter updating (for now, # assume model parameters are updated daily given 15 minute interval data # from all 24 hours) n_samples = 24 * 4 # Set number of control choices n_choices = 3 # Initialize test data io = ModelIO(th_rng, n_samples, n_choices) # Estimate Bayesian network model with pm.Model() as flex_bdn: # *** Temperature sub-model *** # Set parameter priors (betas, error) ta_params = pm.Normal('ta_params', 0, 20, shape=(io.X_temp.shape[1])) ta_sd = pm.Uniform('ta_sd', 0, 20) # Likelihood of temperature estimator ta_est = pm.math.dot(io.X_temp, ta_params) # Likelihood of temperature ta = pm.Normal('ta', mu=ta_est, sd=ta_sd, observed=io.Y_temp) # *** RH sub-model *** # Set parameter priors (betas, error) rh_params = pm.Normal('rh_params', 0, 20, shape=(io.X_hum.shape[1])) rh_sd = pm.Uniform('rh_sd', 0, 20) # Likelihood of humidity estimator rh_est = pm.math.dot(io.X_hum, rh_params) # Likelihood of humidity rh = pm.Normal('rh', mu=rh_est, sd=rh_sd, observed=io.Y_hum) # *** CO2 sub-model *** # Set parameter priors (betas, error) co2_params = pm.Normal('co2_params', 0, 20, shape=(io.X_co2.shape[1])) co2_sd = pm.Uniform('co2_sd', 0, 20) # Likelihood of CO2 estimator co2_est = pm.math.dot(io.X_co2, co2_params) # Likelihood of CO2 co2 = pm.Normal('co2', mu=co2_est, sd=co2_sd, observed=io.Y_co2) # *** Lighting sub-model *** # Set parameter priors (betas, error) lt_params = pm.Normal('lt_params', 0, 20, shape=(io.X_lt.shape[1])) lt_sd = pm.Uniform('lt_sd', 0, 20) # Likelihood of lighting estimator lt_est = pm.math.dot(io.X_lt, lt_params) # Likelihood of lighting lt = pm.Normal('lt', mu=lt_est, sd=lt_sd, observed=io.Y_lt) # *** Demand sub-model *** # Set parameter priors (switch points, betas, error) # Switch points dmd_sp1 = pm.DiscreteUniform('dmd_sp1', io.temp_out.min(), io.temp_out.max()) dmd_sp2 = pm.DiscreteUniform('dmd_sp2', dmd_sp1, io.temp_out.max()) # Betas dmd_params_c = pm.Normal('dmd_params_c', 0, 20, shape=(3, io.X_dmd_c.shape[1])) dmd_params_nc = pm.Normal('dmd_params_nc', 0, 20, shape=(io.X_dmd_nc.shape[1])) # Error dmd_sd = pm.Uniform('dmd_sd', 0, 20) # Likelihood of demand estimator dmd_est_c = pm.math.switch(dmd_sp1 >= io.temp_out, pm.math.dot(io.X_dmd_c, dmd_params_c[0]), pm.math.dot(io.X_dmd_c, dmd_params_c[1])) dmd_est = pm.math.switch(dmd_sp2 >= io.temp_out, dmd_est_c, pm.math.dot(io.X_dmd_c, dmd_params_c[2])) + pm.math.dot( io.X_dmd_nc, dmd_params_nc) # Likelihood of demand dmd = pm.Normal('dmd', mu=dmd_est, sd=dmd_sd, observed=io.Y_dmd) # *** Choice sub-model *** # X variables are the outputs of the above sub-models x_choice_bn = tt.tensor.stack( [ta, rh, co2, lt, io.plug_delta, dmd, io.intercept]).T # Set parameter priors (betas) choice_params = pm.Normal('choice_params', mu=0, sd=10, shape=(x_choice_bn.shape.eval()[1], n_choices)) # Softmax transformation of linear estimator into multinomial choice # probabilities logit_bn = pm.math.dot(x_choice_bn, choice_params) choice_probs_bn = tt.tensor.nnet.softmax(logit_bn) # Set choice probabilities as deterministic PyMC3 variable type p = pm.Deterministic('p', choice_probs_bn) # Likelihood of choice choice = pm.Categorical('choice', p=p, observed=io.Y_choice) # Draw posterior samples trace = pm.sample() # # Sample from the posterior predictive distribution # ppc = pm.sample_posterior_predictive(trace, samples=1, model=flex_bdn) # print(ppc) # Plot parameter traces for diagnostic purposes pm.traceplot(trace) plt.show()
class SampleLayer(lasagne.layers.MergeLayer): """ Sampling layer supporting importance sampling as described in [BURDA]_ and multiple Monte Carlo samples for the approximation of E_q [log( p(x,z) / q(z|x) )]. Parameters ---------- mu : class:`Layer` instance Parameterizing the mean of the distribution to sample from as described in [BURDA]_. log_var : class:`Layer` instance By default assumed to parametrize log(sigma^2) of the distribution to sample from as described in [BURDA]_ which is transformed to sigma using the nonlinearity function as described below. Effectively this means that the nonlinearity function controls what log_var parametrizes. A few common examples: -nonlinearity = lambda x: T.exp(0.5*x) => log_var = log(sigma^2)[default] -nonlinearity = lambda x: T.sqrt(x) => log_var = sigma^2 -nonlinearity = lambda x: x => log_var = sigma eq_samples : int or T.scalar Number of Monte Carlo samples used to estimate the expectation over q(z|x) in eq. (8) in [BURDA]_. iw_samples : int or T.scalar Number of importance samples in the sum over k in eq. (8) in [BURDA]_. nonlinearity : callable or None The nonlinearity that is applied to the log_var input layer to transform it into a standard deviation. By default we assume that log_var = log(sigma^2) and hence the corresponding nonlinearity is f(x) = T.exp(0.5*x) such that T.exp(0.5*log(sigma^2)) = sigma seed : int seed to random stream Methods ---------- seed : Helper function to change the random seed after init is called References ---------- .. [BURDA] Burda, Yuri, Roger Grosse, and Ruslan Salakhutdinov. "Importance Weighted Autoencoders." arXiv preprint arXiv:1509.00519 (2015). """ def __init__(self, mean, log_var, eq_samples=1, iw_samples=1, nonlinearity=lambda x: T.exp(0.5*x), seed=lasagne.random.get_rng().randint(1, 2147462579), **kwargs): super(SampleLayer, self).__init__([mean, log_var], **kwargs) self.eq_samples = eq_samples self.iw_samples = iw_samples self.nonlinearity = nonlinearity self._srng = RandomStreams(seed) def seed(self, seed=lasagne.random.get_rng().randint(1, 2147462579)): self._srng.seed(seed) def get_output_shape_for(self, input_shapes): batch_size, num_latent = input_shapes[0] if isinstance(batch_size, int) and \ isinstance(self.iw_samples, int) and \ isinstance(self.eq_samples, int): out_dim = (batch_size*self.eq_samples*self.iw_samples, num_latent) else: out_dim = (None, num_latent) return out_dim def get_output_for(self, input, **kwargs): mu, log_var = input batch_size, num_latent = mu.shape eps = self._srng.normal( [batch_size, self.eq_samples, self.iw_samples, num_latent], dtype=theano.config.floatX) z = mu.dimshuffle(0,'x','x',1) + \ self.nonlinearity( log_var.dimshuffle(0,'x','x',1)) * eps return z.reshape((-1,num_latent))