Exemple #1
0
class SimpleBernoulliSampleLayer(lasagne.layers.Layer):
    """
    Simple sampling layer drawing samples from bernoulli distributions.

    Parameters
    ----------
    mean : :class:`Layer` instances
          Parameterizing the mean value of each bernoulli distribution
    seed : int
        seed to random stream
    Methods
    ----------
    seed : Helper function to change the random seed after init is called
    """
    def __init__(self,
                 mean,
                 seed=lasagne.random.get_rng().randint(1, 2147462579),
                 **kwargs):
        super(SimpleBernoulliSampleLayer, self).__init__(mean, **kwargs)

        self._srng = RandomStreams(seed)

    def seed(self, seed=lasagne.random.get_rng().randint(1, 2147462579)):
        self._srng.seed(seed)

    def get_output_shape_for(self, input_shape):
        return input_shape

    def get_output_for(self, mu, deterministic=False, **kwargs):
        if deterministic:
            z = T.switch(mu >= 0.5, T.ones_like(mu), T.zeros_like(mu))
        else:
            z = self._srng.binomial(size=mu.shape, p=mu, dtype=mu.dtype)
        return z
class SimpleBernoulliSampleLayer(lasagne.layers.Layer):
    """
    Simple sampling layer drawing samples from bernoulli distributions.

    Parameters
    ----------
    mean : :class:`Layer` instances
          Parameterizing the mean value of each bernoulli distribution
    seed : int
        seed to random stream
    Methods
    ----------
    seed : Helper function to change the random seed after init is called
    """

    def __init__(self, mean,
                 seed=lasagne.random.get_rng().randint(1, 2147462579),
                 **kwargs):
        super(SimpleBernoulliSampleLayer, self).__init__(mean, **kwargs)

        self._srng = RandomStreams(seed)

    def seed(self, seed=lasagne.random.get_rng().randint(1, 2147462579)):
        self._srng.seed(seed)

    def get_output_shape_for(self, input_shape):
        return input_shape

    def get_output_for(self, mu, **kwargs):
        return self._srng.binomial(size=mu.shape, p=mu, dtype=mu.dtype)
Exemple #3
0
class SimpleSampleLayer(L.MergeLayer):
    """
    Simple sampling layer drawing a single Monte Carlo sample to approximate
    E_q [log( p(x,z) / q(z|x) )]. This is the approach described in [KINGMA]_.
    """
    def __init__(self, mean, log_var,
                 seed=lasagne.random.get_rng().randint(1, 2147462579),
                 **kwargs):
        super(SimpleSampleLayer, self).__init__([mean, log_var], **kwargs)

        self._srng = RandomStreams(seed)

    def seed(self, seed=lasagne.random.get_rng().randint(1, 2147462579)):
       self._srng.seed(seed)

    def get_output_shape_for(self, input_shapes):
        return input_shapes[0]

    def get_output_for(self, input, deterministic=False):
        mu, log_var = input
        eps = self._srng.normal(mu.shape)
        z = mu + T.exp(0.5 * log_var) * eps
        if deterministic:
            z = mu
        return z
Exemple #4
0
class BernoulliSampleLayer(lasagne.layers.Layer):
    """
    Bernoulli Sampling layer supporting importance sampling
    Parameters
    ----------
    mean : class:`Layer` instance
           Parameterizing the mean value of each bernoulli distribution
    eq_samples : int or T.scalar
        Number of Monte Carlo samples used to estimate the expectation over
    iw_samples : int or T.scalar
        Number of importance samples in the sum over k
    seed : int
        seed to random stream
    Methods
    ----------
    seed : Helper function to change the random seed after init is called
    """
    def __init__(self,
                 mean,
                 eq_samples=1,
                 iw_samples=1,
                 seed=lasagne.random.get_rng().randint(1, 2147462579),
                 **kwargs):
        super(BernoulliSampleLayer, self).__init__(mean, **kwargs)

        self.eq_samples = eq_samples
        self.iw_samples = iw_samples

        self._srng = RandomStreams(seed)

    def seed(self, seed=lasagne.random.get_rng().randint(1, 2147462579)):
        self._srng.seed(seed)

    def get_output_shape_for(self, input_shape):
        batch_size, num_latent = input_shape
        if isinstance(batch_size, int) and \
           isinstance(self.iw_samples, int) and \
           isinstance(self.eq_samples, int):
            out_dim = (batch_size * self.eq_samples * self.iw_samples,
                       num_latent)
        else:
            out_dim = (None, num_latent)
        return out_dim

    def get_output_for(self, input, **kwargs):
        mu = input
        batch_size, num_latent = mu.shape
        shp = (batch_size, self.eq_samples, self.iw_samples, num_latent)
        mu_shp = mu.dimshuffle(0, 'x', 'x', 1)
        mu_shp = T.repeat(mu_shp, axis=1, repeats=self.eq_samples)
        mu_shp = T.repeat(mu_shp, axis=2, repeats=self.iw_samples)
        samples = self._srng.binomial(size=shp,
                                      p=mu_shp,
                                      dtype=theano.config.floatX)
        return samples.reshape((-1, num_latent))
Exemple #5
0
def test_seed_fn():
    test_use_cuda = [False]
    if cuda_available:
        test_use_cuda.append(True)
    idx = tensor.ivector()
    for use_cuda in test_use_cuda:
        if config.mode == 'FAST_COMPILE' and use_cuda:
            mode = 'FAST_RUN'
        else:
            mode = config.mode

        for new_seed, same in [(234, True), (None, True), (23, False)]:
            random = MRG_RandomStreams(234, use_cuda=use_cuda)
            fn1 = theano.function([],
                                  random.uniform((2, 2), dtype='float32'),
                                  mode=mode)
            fn2 = theano.function([],
                                  random.uniform((3, 3),
                                                 nstreams=2,
                                                 dtype='float32'),
                                  mode=mode)
            fn3 = theano.function([idx],
                                  random.uniform(idx,
                                                 nstreams=3,
                                                 ndim=1,
                                                 dtype='float32'),
                                  mode=mode)

            fn1_val0 = fn1()
            fn1_val1 = fn1()
            assert not numpy.allclose(fn1_val0, fn1_val1)
            fn2_val0 = fn2()
            fn2_val1 = fn2()
            assert not numpy.allclose(fn2_val0, fn2_val1)
            fn3_val0 = fn3([4])
            fn3_val1 = fn3([4])
            assert not numpy.allclose(fn3_val0, fn3_val1)
            assert fn1_val0.size == 4
            assert fn2_val0.size == 9

            random.seed(new_seed)

            fn1_val2 = fn1()
            fn1_val3 = fn1()
            fn2_val2 = fn2()
            fn2_val3 = fn2()
            fn3_val2 = fn3([4])
            fn3_val3 = fn3([4])
            assert numpy.allclose(fn1_val0, fn1_val2) == same
            assert numpy.allclose(fn1_val1, fn1_val3) == same
            assert numpy.allclose(fn2_val0, fn2_val2) == same
            assert numpy.allclose(fn2_val1, fn2_val3) == same
            assert numpy.allclose(fn3_val0, fn3_val2) == same
            assert numpy.allclose(fn3_val1, fn3_val3) == same
Exemple #6
0
class NaiveCoulombShuffleLayer(lasagne.layers.Layer):
    """
    Assumes the input to be a minibatch of coulomb matrices [BATCH, 1, 29, 29] 
    The shuffling is as described in [MONTAVON]_.

    Parameters
    ----------
    coulomb: :class:`Layer` instances
        Parameterizing the coulomb matrix as described in 
        [MONTAVON]_. The code assumes that these have the
        same number of dimensions.

    seed : int
        seed to random stream

    axis : int
        the dimension to permute

    Methods
    ----------
    seed : Helper function to change the random seed after init is called

    References
    ----------
        ..  [MONTAVON] Grégoire Montavon et al 2013 New J. Phys. 15 095003 
            "Machine learning of molecular electronic properties in chemical compound space"
            http://iopscience.iop.org/article/10.1088/1367-2630/15/9/095003#citations.
    """
    def __init__(self,
                 coulomb,
                 seed=lasagne.random.get_rng().randint(1, 2147462579),
                 axis=2,
                 **kwargs):
        super(NaiveCoulombShuffleLayer, self).__init__([coulomb], **kwargs)
        self._srng = RandomStreams(seed)
        self.axis = axis

    def seed(self, seed=lasagne.random.get_rng().randint(1, 2147462579)):
        self._srng.seed(seed)

    def get_output_shape_for(self, input_shapes):
        #The shape doesn't change, this layer only shuffles the rows.
        return input_shapes[0]

    def get_output_for(self, input, **kwargs):
        coulomb = input
        shape = coulomb.shape
        # coulomb matrices are symmetric so it doesn't matter which axis we find the norm over
        norm = T.norm(coulomb, axis=self.axis)
        # we want as many random numbers as (batchsize, axis representing coulomb matrix rows)
        eps = self._srng.normal(norm.shape)
        idxs = T.argsort(norm + eps)
        z = mu + T.exp(0.5 * log_var) * eps
        return z
class BernoulliSampleLayer(lasagne.layers.Layer):
    """
    Bernoulli Sampling layer supporting importance sampling
    Parameters
    ----------
    mean : class:`Layer` instance
           Parameterizing the mean value of each bernoulli distribution
    eq_samples : int or T.scalar
        Number of Monte Carlo samples used to estimate the expectation over
    iw_samples : int or T.scalar
        Number of importance samples in the sum over k
    seed : int
        seed to random stream
    Methods
    ----------
    seed : Helper function to change the random seed after init is called
    """

    def __init__(self, mean,
                 eq_samples=1,
                 iw_samples=1,
                 seed=lasagne.random.get_rng().randint(1, 2147462579),
                  **kwargs):
        super(BernoulliSampleLayer, self).__init__(mean, **kwargs)

        self.eq_samples = eq_samples
        self.iw_samples = iw_samples

        self._srng = RandomStreams(seed)

    def seed(self, seed=lasagne.random.get_rng().randint(1, 2147462579)):
        self._srng.seed(seed)

    def get_output_shape_for(self, input_shape):
        batch_size, num_latent = input_shape
        if isinstance(batch_size, int) and \
           isinstance(self.iw_samples, int) and \
           isinstance(self.eq_samples, int):
            out_dim = (batch_size*self.eq_samples*self.iw_samples, num_latent)
        else:
            out_dim = (None, num_latent)
        return out_dim

    def get_output_for(self, input, **kwargs):
        mu = input
        batch_size, num_latent = mu.shape
        shp = (batch_size, self.eq_samples, self.iw_samples, num_latent)
        mu_shp = mu.dimshuffle(0,'x','x',1)
        mu_shp = T.repeat(mu_shp, axis=1, repeats=self.eq_samples)
        mu_shp = T.repeat(mu_shp, axis=2, repeats=self.iw_samples)
        samples = self._srng.binomial(
            size=shp, p=mu_shp, dtype=theano.config.floatX)
        return samples.reshape((-1, num_latent))
Exemple #8
0
class SimpleConcreteSampleLayer(lasagne.layers.Layer):
    """
    Simple sampling layer drawing a single Monte Carlo sample to approximate
    E_q [log( p(x,z) / q(z|x) )]. This is the approach described in [KINGMA]_.

    Parameters
    ----------
    mu, log_var : :class:`Layer` instances
        Parameterizing the mean and log(variance) of the distribution to sample
        from as described in [KINGMA]_. The code assumes that these have the
        same number of dimensions.

    seed : int
        seed to random stream

    Methods
    ----------
    seed : Helper function to change the random seed after init is called

    References
    ----------
        ..  [KINGMA] Kingma, Diederik P., and Max Welling.
            "Auto-Encoding Variational Bayes."
            arXiv preprint arXiv:1312.6114 (2013).
    """
    def __init__(self,
                 logits,
                 seed=lasagne.random.get_rng().randint(1, 2147462579),
                 **kwargs):
        super(SimpleSampleLayer, self).__init__(logits, **kwargs)

        self._srng = RandomStreams(seed)

    def seed(self, seed=lasagne.random.get_rng().randint(1, 2147462579)):
        self._srng.seed(seed)

    def get_output_shape_for(self, input_shapes):
        return input_shapes

    def get_output_for(self, input, deterministic=False, **kwargs):
        logits = input
        if deterministic:
            z = T.nnet.softmax(logits)
        else:
            shape = logits.shape
            U = self._srng.uniform(shape, dtype=theano.config.floatX)
            gumbel_sampel = -T.log(-T.log(U + 1e-20) + 1e-20)
            y = logits + gumbel_sample
            z = T.nnet.softmax(y / 1)
        return z
def test_seed_fn():
    test_use_cuda = [False]
    if cuda_available:
        test_use_cuda.append(True)
    idx = tensor.ivector()
    for use_cuda in test_use_cuda:
        if config.mode == 'FAST_COMPILE' and use_cuda:
            mode = 'FAST_RUN'
        else:
            mode = config.mode

        for new_seed, same in [(234, True), (None, True), (23, False)]:
            random = MRG_RandomStreams(234, use_cuda=use_cuda)
            fn1 = theano.function([], random.uniform((2, 2), dtype='float32'),
                                  mode=mode)
            fn2 = theano.function([], random.uniform((3, 3), nstreams=2,
                                                     dtype='float32'),
                                  mode=mode)
            fn3 = theano.function([idx],
                                  random.uniform(idx, nstreams=3, ndim=1,
                                                 dtype='float32'),
                                  mode=mode)

            fn1_val0 = fn1()
            fn1_val1 = fn1()
            assert not numpy.allclose(fn1_val0, fn1_val1)
            fn2_val0 = fn2()
            fn2_val1 = fn2()
            assert not numpy.allclose(fn2_val0, fn2_val1)
            fn3_val0 = fn3([4])
            fn3_val1 = fn3([4])
            assert not numpy.allclose(fn3_val0, fn3_val1)
            assert fn1_val0.size == 4
            assert fn2_val0.size == 9

            random.seed(new_seed)

            fn1_val2 = fn1()
            fn1_val3 = fn1()
            fn2_val2 = fn2()
            fn2_val3 = fn2()
            fn3_val2 = fn3([4])
            fn3_val3 = fn3([4])
            assert numpy.allclose(fn1_val0, fn1_val2) == same
            assert numpy.allclose(fn1_val1, fn1_val3) == same
            assert numpy.allclose(fn2_val0, fn2_val2) == same
            assert numpy.allclose(fn2_val1, fn2_val3) == same
            assert numpy.allclose(fn3_val0, fn3_val2) == same
            assert numpy.allclose(fn3_val1, fn3_val3) == same
Exemple #10
0
class SimpleSampleLayer(lasagne.layers.MergeLayer):
    """
    Simple sampling layer drawing a single Monte Carlo sample to approximate
    E_q [log( p(x,z) / q(z|x) )]. This is the approach described in [KINGMA]_.

    Parameters
    ----------
    mu, log_var : :class:`Layer` instances
        Parameterizing the mean and log(variance) of the distribution to sample
        from as described in [KINGMA]_. The code assumes that these have the
        same number of dimensions.

    seed : int
        seed to random stream

    Methods
    ----------
    seed : Helper function to change the random seed after init is called

    References
    ----------
        ..  [KINGMA] Kingma, Diederik P., and Max Welling.
            "Auto-Encoding Variational Bayes."
            arXiv preprint arXiv:1312.6114 (2013).
    """
    def __init__(self,
                 mean,
                 log_var,
                 seed=lasagne.random.get_rng().randint(1, 2147462579),
                 **kwargs):
        super(SimpleSampleLayer, self).__init__([mean, log_var], **kwargs)

        self._srng = RandomStreams(seed)

    def seed(self, seed=lasagne.random.get_rng().randint(1, 2147462579)):
        self._srng.seed(seed)

    def get_output_shape_for(self, input_shapes):
        return input_shapes[0]

    def get_output_for(self, input, deterministic=False, **kwargs):
        mu, log_var = input
        if deterministic:
            z = mu
        else:
            eps = self._srng.normal(mu.shape, dtype=theano.config.floatX)
            z = mu + T.exp(0.5 * log_var) * eps
        return z
Exemple #11
0
class BernoulliSampleLayer(lasagne.layers.Layer):
    def __init__(self,
                 mean,
                 seed=lasagne.random.get_rng().randint(1, 2147462579),
                 **kwargs):
        super(BernoulliSampleLayer, self).__init__(mean, **kwargs)
        self._srng = RandomStreams(seed)

    def seed(self, seed=lasagne.random.get_rng().randint(1, 2147462579)):
        self._srng.seed(seed)

    def get_output_shape_for(self, input_shape):
        return input_shape

    def get_output_for(self, mu, **kwargs):
        return self._srng.binomial(size=mu.shape, p=mu, dtype=mu.dtype)
class SimpleSampleLayer(lasagne.layers.MergeLayer):
    """
    Simple sampling layer drawing a single Monte Carlo sample to approximate
    E_q [log( p(x,z) / q(z|x) )]. This is the approach described in [KINGMA]_.

    Parameters
    ----------
    mu, log_var : :class:`Layer` instances
        Parameterizing the mean and log(variance) of the distribution to sample
        from as described in [KINGMA]_. The code assumes that these have the
        same number of dimensions.

    seed : int
        seed to random stream

    Methods
    ----------
    seed : Helper function to change the random seed after init is called

    References
    ----------
        ..  [KINGMA] Kingma, Diederik P., and Max Welling.
            "Auto-Encoding Variational Bayes."
            arXiv preprint arXiv:1312.6114 (2013).
    """
    def __init__(self, mean, log_var,
                 seed=lasagne.random.get_rng().randint(1, 2147462579),
                 **kwargs):
        super(SimpleSampleLayer, self).__init__([mean, log_var], **kwargs)

        self._srng = RandomStreams(seed)

    def seed(self, seed=lasagne.random.get_rng().randint(1, 2147462579)):
       self._srng.seed(seed)

    def get_output_shape_for(self, input_shapes):
        return input_shapes[0]

    def get_output_for(self, input, **kwargs):
        mu, log_var = input
        eps = self._srng.normal(mu.shape)
        z = mu + T.exp(0.5 * log_var) * eps
        return z
Exemple #13
0
def test_seed_fn():
    idx = tensor.ivector()

    for new_seed, same in [(234, True), (None, True), (23, False)]:
        random = MRG_RandomStreams(234)
        fn1 = theano.function([], random.uniform((2, 2), dtype='float32'))
        fn2 = theano.function([],
                              random.uniform((3, 3),
                                             nstreams=2,
                                             dtype='float32'))
        fn3 = theano.function([idx],
                              random.uniform(idx,
                                             nstreams=3,
                                             ndim=1,
                                             dtype='float32'))

        fn1_val0 = fn1()
        fn1_val1 = fn1()
        assert not np.allclose(fn1_val0, fn1_val1)
        fn2_val0 = fn2()
        fn2_val1 = fn2()
        assert not np.allclose(fn2_val0, fn2_val1)
        fn3_val0 = fn3([4])
        fn3_val1 = fn3([4])
        assert not np.allclose(fn3_val0, fn3_val1)
        assert fn1_val0.size == 4
        assert fn2_val0.size == 9

        random.seed(new_seed)

        fn1_val2 = fn1()
        fn1_val3 = fn1()
        fn2_val2 = fn2()
        fn2_val3 = fn2()
        fn3_val2 = fn3([4])
        fn3_val3 = fn3([4])
        assert np.allclose(fn1_val0, fn1_val2) == same
        assert np.allclose(fn1_val1, fn1_val3) == same
        assert np.allclose(fn2_val0, fn2_val2) == same
        assert np.allclose(fn2_val1, fn2_val3) == same
        assert np.allclose(fn3_val0, fn3_val2) == same
        assert np.allclose(fn3_val1, fn3_val3) == same
Exemple #14
0
def test_seed_fn():
    idx = tensor.ivector()

    for new_seed, same in [(234, True), (None, True), (23, False)]:
        random = MRG_RandomStreams(234)
        fn1 = theano.function([], random.uniform((2, 2), dtype='float32'))
        fn2 = theano.function([], random.uniform((3, 3), nstreams=2,
                                                 dtype='float32'))
        fn3 = theano.function([idx],
                              random.uniform(idx, nstreams=3, ndim=1,
                                             dtype='float32'))

        fn1_val0 = fn1()
        fn1_val1 = fn1()
        assert not np.allclose(fn1_val0, fn1_val1)
        fn2_val0 = fn2()
        fn2_val1 = fn2()
        assert not np.allclose(fn2_val0, fn2_val1)
        fn3_val0 = fn3([4])
        fn3_val1 = fn3([4])
        assert not np.allclose(fn3_val0, fn3_val1)
        assert fn1_val0.size == 4
        assert fn2_val0.size == 9

        random.seed(new_seed)

        fn1_val2 = fn1()
        fn1_val3 = fn1()
        fn2_val2 = fn2()
        fn2_val3 = fn2()
        fn3_val2 = fn3([4])
        fn3_val3 = fn3([4])
        assert np.allclose(fn1_val0, fn1_val2) == same
        assert np.allclose(fn1_val1, fn1_val3) == same
        assert np.allclose(fn2_val0, fn2_val2) == same
        assert np.allclose(fn2_val1, fn2_val3) == same
        assert np.allclose(fn3_val0, fn3_val2) == same
        assert np.allclose(fn3_val1, fn3_val3) == same
Exemple #15
0
class SampleLayer(lasagne.layers.MergeLayer):
    """
    Sampling layer supporting importance sampling as described in [BURDA]_ and
    multiple Monte Carlo samples for the approximation of
    E_q [log( p(x,z) / q(z|x) )].

    Parameters
    ----------
    mu : class:`Layer` instance
        Parameterizing the mean of the distribution to sample
        from as described in [BURDA]_.

    log_var : class:`Layer` instance
        By default assumed to parametrize log(sigma^2) of the distribution to
        sample from as described in [BURDA]_ which is transformed to sigma using
        the nonlinearity function as described below. Effectively this means
        that the nonlinearity function controls what log_var parametrizes. A few
        common examples:
        -nonlinearity = lambda x: T.exp(0.5*x) => log_var = log(sigma^2)[default]
        -nonlinearity = lambda x: T.sqrt(x) => log_var = sigma^2
        -nonlinearity = lambda x: x => log_var = sigma

    eq_samples : int or T.scalar
        Number of Monte Carlo samples used to estimate the expectation over
        q(z|x) in eq. (8) in [BURDA]_.

    iw_samples : int or T.scalar
        Number of importance samples in the sum over k in eq. (8) in [BURDA]_.

    nonlinearity : callable or None
        The nonlinearity that is applied to the log_var input layer to transform
        it into a standard deviation. By default we assume that
        log_var = log(sigma^2) and hence the corresponding nonlinearity is
        f(x) = T.exp(0.5*x) such that T.exp(0.5*log(sigma^2)) = sigma

    seed : int
        seed to random stream

    Methods
    ----------
    seed : Helper function to change the random seed after init is called

    References
    ----------
        ..  [BURDA] Burda, Yuri, Roger Grosse, and Ruslan Salakhutdinov.
            "Importance Weighted Autoencoders."
            arXiv preprint arXiv:1509.00519 (2015).
    """
    def __init__(self,
                 mean,
                 log_var,
                 eq_samples=1,
                 iw_samples=1,
                 nonlinearity=lambda x: T.exp(0.5 * x),
                 seed=lasagne.random.get_rng().randint(1, 2147462579),
                 **kwargs):
        super(SampleLayer, self).__init__([mean, log_var], **kwargs)

        self.eq_samples = eq_samples
        self.iw_samples = iw_samples
        self.nonlinearity = nonlinearity

        self._srng = RandomStreams(seed)

    def seed(self, seed=lasagne.random.get_rng().randint(1, 2147462579)):
        self._srng.seed(seed)

    def get_output_shape_for(self, input_shapes):
        batch_size, num_latent = input_shapes[0]
        if isinstance(batch_size, int) and \
           isinstance(self.iw_samples, int) and \
           isinstance(self.eq_samples, int):
            out_dim = (batch_size * self.eq_samples * self.iw_samples,
                       num_latent)
        else:
            out_dim = (None, num_latent)
        return out_dim

    def get_output_for(self, input, deterministic=False, **kwargs):
        mu, log_var = input
        batch_size, num_latent = mu.shape

        if deterministic:
            z = mu.dimshuffle(0, 'x', 'x', 1) * T.ones(
                (batch_size, self.eq_samples, self.iw_samples, num_latent))
        else:
            eps = self._srng.normal(
                [batch_size, self.eq_samples, self.iw_samples, num_latent],
                dtype=theano.config.floatX)

            z = mu.dimshuffle(0,'x','x',1) + \
                self.nonlinearity( log_var.dimshuffle(0,'x','x',1)) * eps

        return z.reshape((-1, num_latent))
Exemple #16
0
class GaussianMade_SVI:
    """
    Implements a Made, where each conditional probability is modelled by a single gaussian component.
    This made is trained with stochastic variational inference, using the local reparameterization trick.
    References:
    Germain et al., "MADE: Masked Autoencoder for Distribution Estimation", ICML, 2015.
    Kingma et al., "Variational Dropout and the Local Reparameterization Trick", NIPS, 2015.
    """

    def __init__(self, n_inputs, n_hiddens, act_fun, input_order='sequential', mode='sequential', input=None, rng=np.random):
        """
        Constructor.
        :param n_inputs: number of inputs
        :param n_hiddens: list with number of hidden units for each hidden layer
        :param act_fun: name of activation function
        :param input_order: order of inputs
        :param mode: strategy for assigning degrees to hidden nodes: can be 'random' or 'sequential'
        :param input: theano variable to serve as input; if None, a new variable is created
        """

        # save input arguments
        self.n_inputs = n_inputs
        self.n_hiddens = n_hiddens
        self.act_fun = act_fun
        self.mode = mode

        # create network's parameters
        degrees = create_degrees(n_inputs, n_hiddens, input_order, mode, rng)
        Ms, Mmp = create_masks(degrees)
        mWs, mbs, sWs, sbs, mWm, mbm, sWm, sbm, mWp, mbp, sWp, sbp = create_weights_SVI(n_inputs, n_hiddens, rng)
        self.mps = mWs + mbs + [mWm, mbm, mWp, mbp]
        self.sps = sWs + sbs + [sWm, sbm, sWp, sbp]
        self.parms = self.mps + self.sps
        self.input_order = degrees[0]
        self.srng = RandomStreams(rng.randint(2**30))

        # activation function
        f = util.ml.select_theano_act_function(act_fun, dtype)

        # input matrix
        self.input = tt.matrix('x', dtype=dtype) if input is None else input
        h = self.input
        uas = []

        # feedforward propagation
        for l, (M, mW, mb, sW, sb, N) in enumerate(izip(Ms, mWs, mbs, sWs, sbs, n_hiddens)):
            ma = tt.dot(h, M * mW) + mb
            sa = tt.dot(h**2, M * tt.exp(2*sW)) + tt.exp(2*sb)
            ua = self.srng.normal((h.shape[0], N), dtype=dtype)
            h = f(tt.sqrt(sa) * ua + ma)
            h.name = 'h' + str(l + 1)
            uas.append(ua)

        # output means
        mam = tt.dot(h, Mmp * mWm) + mbm
        sam = tt.dot(h**2, Mmp * tt.exp(2*sWm)) + tt.exp(2*sbm)
        uam = self.srng.normal((h.shape[0], n_inputs), dtype=dtype)
        self.m = tt.sqrt(sam) * uam + mam
        self.m.name = 'm'

        # output log precisions
        map = tt.dot(h, Mmp * mWp) + mbp
        sap = tt.dot(h**2, Mmp * tt.exp(2*sWp)) + tt.exp(2*sbp)
        uap = self.srng.normal((h.shape[0], n_inputs), dtype=dtype)
        self.logp = tt.sqrt(sap) * uap + map
        self.logp.name = 'logp'

        # random numbers driving made
        self.u = tt.exp(0.5 * self.logp) * (self.input - self.m)

        # log likelihoods
        self.L = -0.5 * (n_inputs * np.log(2 * np.pi) + tt.sum(self.u ** 2 - self.logp, axis=1))
        self.L.name = 'L'

        # train objective
        self.trn_loss = -tt.mean(self.L)
        self.trn_loss.name = 'trn_loss'

        # collect all noise variables
        self.all_us = uas + [uam, uap]

        # theano evaluation functions, will be compiled when first needed
        self.eval_lprob_f = None
        self.eval_comps_f = None
        self.eval_lprob_f_rand = None
        self.eval_comps_f_rand = None
        self.eval_lprob_f_rand_const = None
        self.eval_comps_f_rand_const = None

    def reset_theano_functions(self):
        """
        Resets theano functions, so that they are compiled again when needed.
        """

        self.eval_lprob_f = None
        self.eval_comps_f = None
        self.eval_lprob_f_rand = None
        self.eval_comps_f_rand = None
        self.eval_lprob_f_rand_const = None
        self.eval_comps_f_rand_const = None

    def _create_constant_noise_across_datapoints(self, n_data):
        """
        Helper function. Creates and returns new theano variables representing noise, where noise is the same across
        datapoints in the minibatch. Useful for binding the original noise variables in an evaluation function where
        randomness is required but same predictions are needed across minibatch.
        """

        uas = [tt.tile(self.srng.normal((N,), dtype=dtype), [n_data, 1]) for N in self.n_hiddens]
        uam = tt.tile(self.srng.normal((self.n_inputs,), dtype=dtype), [n_data, 1])
        uap = tt.tile(self.srng.normal((self.n_inputs,), dtype=dtype), [n_data, 1])

        return uas + [uam, uap]

    def _create_zero_noise(self, n_data):
        """
        Helper function. Creates and returns new theano variables representing zero noise. Useful for binding the
        original noise variables in an evaluation function where randomness is not required.
        """

        uas = [tt.zeros((n_data, N), dtype=dtype) for N in self.n_hiddens]
        uam = tt.zeros((n_data, self.n_inputs), dtype=dtype)
        uap = tt.zeros((n_data, self.n_inputs), dtype=dtype)

        return uas + [uam, uap]

    def eval(self, x, log=True, rand=False, const_noise=True):
        """
        Evaluate log probabilities for given inputs.
        :param x: data matrix where rows are inputs
        :param log: whether to return probabilities in the log domain
        :param rand: whether to inject randomness to the activations
        :param const_noise: whether the injected randomness is the same across datapoints
        :return: list of log probabilities log p(x)
        """

        x = np.asarray(x, dtype=dtype)
        one_datapoint = x.ndim == 1
        x = x[np.newaxis, :] if one_datapoint else x

        if rand:

            if const_noise:

                # compile theano function, if haven't already done so
                if self.eval_lprob_f_rand_const is None:

                    n_data = tt.iscalar('n_data')
                    all_us = self._create_constant_noise_across_datapoints(n_data)

                    self.eval_lprob_f_rand_const = theano.function(
                        inputs=[self.input, n_data],
                        outputs=self.L,
                        givens=zip(self.all_us, all_us)
                    )

                lprob = self.eval_lprob_f_rand_const(x, x.shape[0])

            else:

                # compile theano function, if haven't already done so
                if self.eval_lprob_f_rand is None:
                    self.eval_lprob_f_rand = theano.function(
                        inputs=[self.input],
                        outputs=self.L
                    )

                lprob = self.eval_lprob_f_rand(x)

        else:

            # compile theano function, if haven't already done so
            if self.eval_lprob_f is None:

                n_data = tt.iscalar('n_data')
                all_us = self._create_zero_noise(n_data)

                self.eval_lprob_f = theano.function(
                    inputs=[self.input, n_data],
                    outputs=self.L,
                    givens=zip(self.all_us, all_us)
                )

            lprob = self.eval_lprob_f(x, x.shape[0])

        lprob = lprob[0] if one_datapoint else lprob

        return lprob if log else np.exp(lprob)

    def eval_comps(self, x, rand=False, const_noise=False):
        """
        Evaluate the parameters of all gaussians at given input locations.
        :param x: rows are input locations
        :param rand: whether to inject randomness to the activations
        :param const_noise: whether the injected randomness is the same across datapoints
        :return: means and log precisions
        """

        x = np.asarray(x, dtype=dtype)
        one_datapoint = x.ndim == 1
        x = x[np.newaxis, :] if one_datapoint else x

        if rand:

            if const_noise:

                # compile theano function, if haven't already done so
                if self.eval_comps_f_rand_const is None:

                    n_data = tt.iscalar('n_data')
                    all_us = self._create_constant_noise_across_datapoints(n_data)

                    self.eval_comps_f_rand_const = theano.function(
                        inputs=[self.input, n_data],
                        outputs=[self.m, self.logp],
                        givens=zip(self.all_us, all_us)
                    )

                comps = self.eval_comps_f_rand_const(x, x.shape[0])

            else:

                # compile theano function, if haven't already done so
                if self.eval_comps_f_rand is None:
                    self.eval_comps_f_rand = theano.function(
                        inputs=[self.input],
                        outputs=[self.m, self.logp]
                    )

                comps = self.eval_comps_f_rand(x)

        else:

            # compile theano function, if haven't already done so
            if self.eval_comps_f is None:

                n_data = tt.iscalar('n_data')
                all_us = self._create_zero_noise(n_data)

                self.eval_comps_f = theano.function(
                    inputs=[self.input, n_data],
                    outputs=[self.m, self.logp],
                    givens=zip(self.all_us, all_us)
                )

            comps = self.eval_comps_f(x, x.shape[0])

        return map(lambda u: u[0], comps) if one_datapoint else comps

    def gen(self, n_samples=None, rand=False, const_noise=False, u=None, rng=np.random):
        """
        Generate samples from made. Requires as many evaluations as number of inputs.
        :param n_samples: number of samples, 1 if None
        :param rand: whether to inject randomness to the activations
        :param const_noise: whether the injected randomness is the same across samples
        :param u: random numbers to use in generating samples; if None, new random numbers are drawn
        :return: samples
        """

        if n_samples is None:
            return self.gen(1, rand, const_noise, u if u is None else u[np.newaxis, :], rng)[0]

        x = np.zeros([n_samples, self.n_inputs], dtype=dtype)
        u = rng.randn(n_samples, self.n_inputs).astype(dtype) if u is None else u

        # seed for theano random stream
        seed = rng.randint(2**30)

        for i in xrange(1, self.n_inputs + 1):
            self.srng.seed(seed)  # need to have same activation noise in each pass
            m, logp = self.eval_comps(x, rand=rand, const_noise=const_noise)
            idx = np.argwhere(self.input_order == i)[0, 0]
            x[:, idx] = m[:, idx] + np.exp(np.minimum(-0.5 * logp[:, idx], 10.0)) * u[:, idx]

        return x

    def calc_random_numbers(self, x):
        raise NotImplementedError()
Exemple #17
0
def main(base_dir):
    """Implement Bayesian network and plot resultant parameter estimates."""
    # Set numpy and theano RNG seeds for consistency
    np.random.seed(123)
    th_rng = MRG_RandomStreams()
    th_rng.seed(123)

    # Set number of observations to use for parameter updating (for now,
    # assume model parameters are updated daily given 15 minute interval data
    # from all 24 hours)
    n_samples = 24 * 4
    # Set number of control choices
    n_choices = 3

    # Initialize test data
    io = ModelIO(th_rng, n_samples, n_choices)

    # Estimate Bayesian network model
    with pm.Model() as flex_bdn:

        # *** Temperature sub-model ***

        # Set parameter priors (betas, error)
        ta_params = pm.Normal('ta_params', 0, 20, shape=(io.X_temp.shape[1]))
        ta_sd = pm.Uniform('ta_sd', 0, 20)
        # Likelihood of temperature estimator
        ta_est = pm.math.dot(io.X_temp, ta_params)
        # Likelihood of temperature
        ta = pm.Normal('ta', mu=ta_est, sd=ta_sd, observed=io.Y_temp)

        # *** RH sub-model ***

        # Set parameter priors (betas, error)
        rh_params = pm.Normal('rh_params', 0, 20, shape=(io.X_hum.shape[1]))
        rh_sd = pm.Uniform('rh_sd', 0, 20)
        # Likelihood of humidity estimator
        rh_est = pm.math.dot(io.X_hum, rh_params)
        # Likelihood of humidity
        rh = pm.Normal('rh', mu=rh_est, sd=rh_sd, observed=io.Y_hum)

        # *** CO2 sub-model ***

        # Set parameter priors (betas, error)
        co2_params = pm.Normal('co2_params', 0, 20, shape=(io.X_co2.shape[1]))
        co2_sd = pm.Uniform('co2_sd', 0, 20)
        # Likelihood of CO2 estimator
        co2_est = pm.math.dot(io.X_co2, co2_params)
        # Likelihood of CO2
        co2 = pm.Normal('co2', mu=co2_est, sd=co2_sd, observed=io.Y_co2)

        # *** Lighting sub-model ***

        # Set parameter priors (betas, error)
        lt_params = pm.Normal('lt_params', 0, 20, shape=(io.X_lt.shape[1]))
        lt_sd = pm.Uniform('lt_sd', 0, 20)
        # Likelihood of lighting estimator
        lt_est = pm.math.dot(io.X_lt, lt_params)
        # Likelihood of lighting
        lt = pm.Normal('lt', mu=lt_est, sd=lt_sd, observed=io.Y_lt)

        # *** Demand sub-model ***

        # Set parameter priors (switch points, betas, error)
        # Switch points
        dmd_sp1 = pm.DiscreteUniform('dmd_sp1', io.temp_out.min(),
                                     io.temp_out.max())
        dmd_sp2 = pm.DiscreteUniform('dmd_sp2', dmd_sp1, io.temp_out.max())
        # Betas
        dmd_params_c = pm.Normal('dmd_params_c',
                                 0,
                                 20,
                                 shape=(3, io.X_dmd_c.shape[1]))
        dmd_params_nc = pm.Normal('dmd_params_nc',
                                  0,
                                  20,
                                  shape=(io.X_dmd_nc.shape[1]))
        # Error
        dmd_sd = pm.Uniform('dmd_sd', 0, 20)
        # Likelihood of demand estimator
        dmd_est_c = pm.math.switch(dmd_sp1 >= io.temp_out,
                                   pm.math.dot(io.X_dmd_c, dmd_params_c[0]),
                                   pm.math.dot(io.X_dmd_c, dmd_params_c[1]))
        dmd_est = pm.math.switch(dmd_sp2 >= io.temp_out, dmd_est_c,
                                 pm.math.dot(io.X_dmd_c,
                                             dmd_params_c[2])) + pm.math.dot(
                                                 io.X_dmd_nc, dmd_params_nc)
        # Likelihood of demand
        dmd = pm.Normal('dmd', mu=dmd_est, sd=dmd_sd, observed=io.Y_dmd)

        # *** Choice sub-model ***

        # X variables are the outputs of the above sub-models
        x_choice_bn = tt.tensor.stack(
            [ta, rh, co2, lt, io.plug_delta, dmd, io.intercept]).T
        # Set parameter priors (betas)
        choice_params = pm.Normal('choice_params',
                                  mu=0,
                                  sd=10,
                                  shape=(x_choice_bn.shape.eval()[1],
                                         n_choices))
        # Softmax transformation of linear estimator into multinomial choice
        # probabilities
        logit_bn = pm.math.dot(x_choice_bn, choice_params)
        choice_probs_bn = tt.tensor.nnet.softmax(logit_bn)
        # Set choice probabilities as deterministic PyMC3 variable type
        p = pm.Deterministic('p', choice_probs_bn)
        # Likelihood of choice
        choice = pm.Categorical('choice', p=p, observed=io.Y_choice)

        # Draw posterior samples
        trace = pm.sample()

    # # Sample from the posterior predictive distribution
    # ppc = pm.sample_posterior_predictive(trace, samples=1, model=flex_bdn)
    # print(ppc)
    # Plot parameter traces for diagnostic purposes
    pm.traceplot(trace)
    plt.show()
class SampleLayer(lasagne.layers.MergeLayer):
    """
    Sampling layer supporting importance sampling as described in [BURDA]_ and
    multiple Monte Carlo samples for the approximation of
    E_q [log( p(x,z) / q(z|x) )].

    Parameters
    ----------
    mu : class:`Layer` instance
        Parameterizing the mean of the distribution to sample
        from as described in [BURDA]_.

    log_var : class:`Layer` instance
        By default assumed to parametrize log(sigma^2) of the distribution to
        sample from as described in [BURDA]_ which is transformed to sigma using
        the nonlinearity function as described below. Effectively this means
        that the nonlinearity function controls what log_var parametrizes. A few
        common examples:
        -nonlinearity = lambda x: T.exp(0.5*x) => log_var = log(sigma^2)[default]
        -nonlinearity = lambda x: T.sqrt(x) => log_var = sigma^2
        -nonlinearity = lambda x: x => log_var = sigma

    eq_samples : int or T.scalar
        Number of Monte Carlo samples used to estimate the expectation over
        q(z|x) in eq. (8) in [BURDA]_.

    iw_samples : int or T.scalar
        Number of importance samples in the sum over k in eq. (8) in [BURDA]_.

    nonlinearity : callable or None
        The nonlinearity that is applied to the log_var input layer to transform
        it into a standard deviation. By default we assume that
        log_var = log(sigma^2) and hence the corresponding nonlinearity is
        f(x) = T.exp(0.5*x) such that T.exp(0.5*log(sigma^2)) = sigma

    seed : int
        seed to random stream

    Methods
    ----------
    seed : Helper function to change the random seed after init is called

    References
    ----------
        ..  [BURDA] Burda, Yuri, Roger Grosse, and Ruslan Salakhutdinov.
            "Importance Weighted Autoencoders."
            arXiv preprint arXiv:1509.00519 (2015).
    """

    def __init__(self, mean, log_var,
                 eq_samples=1,
                 iw_samples=1,
                 nonlinearity=lambda x: T.exp(0.5*x),
                 seed=lasagne.random.get_rng().randint(1, 2147462579),
                  **kwargs):
        super(SampleLayer, self).__init__([mean, log_var], **kwargs)

        self.eq_samples = eq_samples
        self.iw_samples = iw_samples
        self.nonlinearity = nonlinearity

        self._srng = RandomStreams(seed)

    def seed(self, seed=lasagne.random.get_rng().randint(1, 2147462579)):
        self._srng.seed(seed)

    def get_output_shape_for(self, input_shapes):
        batch_size, num_latent = input_shapes[0]
        if isinstance(batch_size, int) and \
           isinstance(self.iw_samples, int) and \
           isinstance(self.eq_samples, int):
            out_dim = (batch_size*self.eq_samples*self.iw_samples, num_latent)
        else:
            out_dim = (None, num_latent)
        return out_dim

    def get_output_for(self, input, **kwargs):
        mu, log_var = input
        batch_size, num_latent = mu.shape
        eps = self._srng.normal(
            [batch_size, self.eq_samples, self.iw_samples, num_latent],
             dtype=theano.config.floatX)

        z = mu.dimshuffle(0,'x','x',1) + \
            self.nonlinearity( log_var.dimshuffle(0,'x','x',1)) * eps

        return z.reshape((-1,num_latent))