コード例 #1
0
def test_dirichlet():
    R = RandomStreams(234)
    n = R.dirichlet(alpha=numpy.ones(10, ), draw_shape=(5, ))

    f = theano.function([], n)

    assert f().shape == (5, 10)
コード例 #2
0
def test_dirichlet():
    R = RandomStreams(234)
    n = R.dirichlet(alpha=numpy.ones(10,), draw_shape=(5,))
    
    f = theano.function([], n)
    
    assert f().shape == (5, 10)
コード例 #3
0
import theano
from theano import tensor
from rstreams import RandomStreams
import distributions
from sample import mh2_sample, mh_sample
from for_theano import memoized, evaluate

s_rng = RandomStreams(123)

nr_words = 4
nr_topics = 2
alpha = 0.8
beta = 1.

# Topic distribution per document
doc_mixture = memoized(lambda doc_id: s_rng.dirichlet([alpha/nr_topics]*nr_topics))

# Word distribution per topic
topic_mixture = memoized(lambda top_id: s_rng.dirichlet([beta/nr_words]*nr_words))

# For each word in the document, draw a topic according to multinomial with document specific prior
# TODO, see comment below: topics = memoized(lambda doc_id, nr: s_rng.multinomial(1, doc_mixture[doc_id], draw_shape=(nr,)))
topics = memoized(lambda doc_id, nr: s_rng.binomial(1, doc_mixture(doc_id)[0], draw_shape=(nr,)))

# Draw words for a specific topic
word_topic = lambda top_id: s_rng.multinomial(1, topic_mixture(top_id))

# TODO: memoized only works on the pre-compiled graph. This makes it fail in the case where we have to map 
# a vector of topics to individual multinomials with as priors the different topics. In the case of two topics
# we can hack around this by using a binomial topic distribution and using a switch statement here:
word_topic_mapper = lambda top_id: tensor.switch(top_id, word_topic(0), word_topic(1))
コード例 #4
0
import numpy, pylab
import theano
from theano import tensor
from rstreams import RandomStreams
import distributions
from sample import mh2_sample
from rv import full_log_likelihood

s_rng = RandomStreams(3424)

p = s_rng.dirichlet(numpy.asarray([1, 1]))[0]
m1 = s_rng.uniform(low=-5, high=5)
m2 = s_rng.uniform(low=-5, high=5)
v = s_rng.uniform(low=0, high=1)

C = s_rng.binomial(1, p, draw_shape=(4,))
m = tensor.switch(C, m1, m2)
D = s_rng.normal(m, v, draw_shape=(4,))        

D_data = numpy.asarray([1, 1.2, 3, 3.4], dtype=theano.config.floatX)

givens = dict([(D, D_data)])
sampler = mh2_sample(s_rng, [p, m1, m2, v], givens)            

samples = sampler(200, 1000, 100)
print samples[0].mean(), samples[1].mean(), samples[2].mean(), samples[3].mean()
コード例 #5
0
from theano import tensor
from rstreams import RandomStreams
import distributions
from sample import mh2_sample, mh_sample
from for_theano import memoized, evaluate

s_rng = RandomStreams(123)

nr_words = 4
nr_topics = 2
alpha = 0.8
beta = 1.

# Topic distribution per document
doc_mixture = memoized(
    lambda doc_id: s_rng.dirichlet([alpha / nr_topics] * nr_topics))

# Word distribution per topic
topic_mixture = memoized(
    lambda top_id: s_rng.dirichlet([beta / nr_words] * nr_words))

# For each word in the document, draw a topic according to multinomial with document specific prior
# TODO, see comment below: topics = memoized(lambda doc_id, nr: s_rng.multinomial(1, doc_mixture[doc_id], draw_shape=(nr,)))
topics = memoized(lambda doc_id, nr: s_rng.binomial(
    1, doc_mixture(doc_id)[0], draw_shape=(nr, )))

# Draw words for a specific topic
word_topic = lambda top_id: s_rng.multinomial(1, topic_mixture(top_id))

# TODO: memoized only works on the pre-compiled graph. This makes it fail in the case where we have to map
# a vector of topics to individual multinomials with as priors the different topics. In the case of two topics
コード例 #6
0
import numpy, pylab
import theano
from theano import tensor
from rstreams import RandomStreams
import distributions
from sample import mh2_sample
from for_theano import evaluate
from rv import full_log_likelihood

s_rng = RandomStreams(23424)

fair_prior = 0.999

coin_weight = tensor.switch(
    s_rng.binomial(1, fair_prior) > 0.5, 0.5,
    s_rng.dirichlet([1, 1])[0])

make_coin = lambda p, size: s_rng.binomial(1, p, draw_shape=(size, ))
coin = lambda size: make_coin(coin_weight, size)

for size in [1, 3, 6, 10, 20, 30, 50, 70, 100]:
    data = evaluate(make_coin(0.9, size))

    sampler = mh2_sample(s_rng, [coin_weight], {coin(size): data})

    print "nr of examples", size, ", estimated probability", sampler(
        nr_samples=400, burnin=20000, lag=10)[0].mean()
コード例 #7
0
import numpy, pylab
import theano
from rstreams import RandomStreams
import distributions
from sample import mh2_sample
from for_theano import memoized

s_rng = RandomStreams(23424)

phi = s_rng.dirichlet(numpy.asarray([1, 1, 1, 1, 1]))
alpha = s_rng.gamma(2., 2.)        
prototype = phi*alpha

bag_prototype =  memoized(lambda bag: s_rng.dirichlet(prototype))
draw_marbles = lambda bag, nr: s_rng.multinomial(1, bag_prototype(bag), draw_shape=(nr,))

marbles_bag_1 = numpy.asarray([[1,1,1,1,1,1],
                               [0,0,0,0,0,0],
                               [0,0,0,0,0,0],
                               [0,0,0,0,0,0],
                               [0,0,0,0,0,0]], dtype=theano.config.floatX).T                                
marbles_bag_2 = numpy.asarray([[0,0,0,0,0,0],
                               [1,1,1,1,1,1],
                               [0,0,0,0,0,0],
                               [0,0,0,0,0,0],
                               [0,0,0,0,0,0]], dtype=theano.config.floatX).T 
marbles_bag_3 = numpy.asarray([[0,0,0,0,0,0],
                               [0,0,0,0,0,0],
                               [0,0,0,0,0,0],
                               [1,1,1,1,1,1],
                               [0,0,0,0,0,0]], dtype=theano.config.floatX).T 
コード例 #8
0
import numpy, pylab
import theano
from theano import tensor
from rstreams import RandomStreams
import distributions
from sample import mh2_sample
from for_theano import evaluate
from rv import full_log_likelihood

s_rng = RandomStreams(23424)

fair_prior = 0.999

coin_weight = tensor.switch(s_rng.binomial(1, fair_prior) > 0.5, 0.5, s_rng.dirichlet([1, 1])[0])

make_coin = lambda p, size: s_rng.binomial(1, p, draw_shape=(size,))    
coin = lambda size: make_coin(coin_weight, size)
            
for size in [1, 3, 6, 10, 20, 30, 50, 70, 100]:
    data = evaluate(make_coin(0.9, size))
            
    sampler = mh2_sample(s_rng, [coin_weight], {coin(size) : data})            
    
    print "nr of examples", size, ", estimated probability", sampler(nr_samples=400, burnin=20000, lag=10)[0].mean()
コード例 #9
0
marbles_bag_2 = numpy.asarray([[0,0,0,0,0,0],
                               [1,1,1,1,1,1],
                               [0,0,0,0,0,0],
                               [0,0,0,0,0,0],
                               [0,0,0,0,0,0]], dtype=theano.config.floatX).T 
marbles_bag_3 = numpy.asarray([[0,0,0,0,0,0],
                               [0,0,0,0,0,0],
                               [0,0,0,0,0,0],
                               [1,1,1,1,1,1],
                               [0,0,0,0,0,0]], dtype=theano.config.floatX).T 
marbles_bag_4 = numpy.asarray([[0],[0],[0],[0],[1]], dtype=theano.config.floatX).T 



# Define flat model
bag_prototype =  memoized(lambda bag: s_rng.dirichlet(numpy.asarray([1, 1, 1, 1, 1])*5))
draw_marbles = lambda bag, nr: s_rng.multinomial(1, bag_prototype(bag), draw_shape=(nr,))

# Generate samples from the model
givens = {draw_marbles(1,6): marbles_bag_1,
            draw_marbles(2,6): marbles_bag_2,
            draw_marbles(3,6): marbles_bag_3,
            draw_marbles(4,1): marbles_bag_4}
            
sampler = mh2_sample(s_rng, [draw_marbles(4,1)], givens)            

samples = sampler(200, 100, 100)
data = samples[0]

# Show histogram
pylab.subplot(211)
コード例 #10
0
import numpy, pylab
import theano
from theano import tensor
from rstreams import RandomStreams
import distributions
from sample import mh2_sample
from rv import full_log_likelihood

s_rng = RandomStreams(3424)

p = s_rng.dirichlet(numpy.asarray([1, 1]))[0]
m1 = s_rng.uniform(low=-5, high=5)
m2 = s_rng.uniform(low=-5, high=5)
v = s_rng.uniform(low=0, high=1)

C = s_rng.binomial(1, p, draw_shape=(4, ))
m = tensor.switch(C, m1, m2)
D = s_rng.normal(m, v, draw_shape=(4, ))

D_data = numpy.asarray([1, 1.2, 3, 3.4], dtype=theano.config.floatX)

givens = dict([(D, D_data)])
sampler = mh2_sample(s_rng, [p, m1, m2, v], givens)

samples = sampler(200, 1000, 100)
print samples[0].mean(), samples[1].mean(), samples[2].mean(), samples[3].mean(
)