def test_multinomial(): R = RandomStreams(234) n = R.multinomial(5, numpy.ones(5,)/5, draw_shape=(2,)) f = theano.function([], n) assert f().shape == (2, 5)
def test_multinomial(): R = RandomStreams(234) n = R.multinomial(5, numpy.ones(5, ) / 5, draw_shape=(2, )) f = theano.function([], n) assert f().shape == (2, 5)
nr_topics = 2 alpha = 0.8 beta = 1. # Topic distribution per document doc_mixture = memoized(lambda doc_id: s_rng.dirichlet([alpha/nr_topics]*nr_topics)) # Word distribution per topic topic_mixture = memoized(lambda top_id: s_rng.dirichlet([beta/nr_words]*nr_words)) # For each word in the document, draw a topic according to multinomial with document specific prior # TODO, see comment below: topics = memoized(lambda doc_id, nr: s_rng.multinomial(1, doc_mixture[doc_id], draw_shape=(nr,))) topics = memoized(lambda doc_id, nr: s_rng.binomial(1, doc_mixture(doc_id)[0], draw_shape=(nr,))) # Draw words for a specific topic word_topic = lambda top_id: s_rng.multinomial(1, topic_mixture(top_id)) # TODO: memoized only works on the pre-compiled graph. This makes it fail in the case where we have to map # a vector of topics to individual multinomials with as priors the different topics. In the case of two topics # we can hack around this by using a binomial topic distribution and using a switch statement here: word_topic_mapper = lambda top_id: tensor.switch(top_id, word_topic(0), word_topic(1)) # Maps topics to words # TODO, see comment above: get_words = memoized(lambda doc_id, nr: theano.map(word_topic, topics(doc_id, nr))[0]) get_words = memoized(lambda doc_id, nr: theano.map(word_topic_mapper, topics(doc_id, nr))[0]) # Define training 'documents' document_1 = numpy.asarray([[1,0,0,0], [1,0,0,0], [0,1,0,0], [1,0,0,0],
# Topic distribution per document doc_mixture = memoized( lambda doc_id: s_rng.dirichlet([alpha / nr_topics] * nr_topics)) # Word distribution per topic topic_mixture = memoized( lambda top_id: s_rng.dirichlet([beta / nr_words] * nr_words)) # For each word in the document, draw a topic according to multinomial with document specific prior # TODO, see comment below: topics = memoized(lambda doc_id, nr: s_rng.multinomial(1, doc_mixture[doc_id], draw_shape=(nr,))) topics = memoized(lambda doc_id, nr: s_rng.binomial( 1, doc_mixture(doc_id)[0], draw_shape=(nr, ))) # Draw words for a specific topic word_topic = lambda top_id: s_rng.multinomial(1, topic_mixture(top_id)) # TODO: memoized only works on the pre-compiled graph. This makes it fail in the case where we have to map # a vector of topics to individual multinomials with as priors the different topics. In the case of two topics # we can hack around this by using a binomial topic distribution and using a switch statement here: word_topic_mapper = lambda top_id: tensor.switch(top_id, word_topic(0), word_topic(1)) # Maps topics to words # TODO, see comment above: get_words = memoized(lambda doc_id, nr: theano.map(word_topic, topics(doc_id, nr))[0]) get_words = memoized( lambda doc_id, nr: theano.map(word_topic_mapper, topics(doc_id, nr))[0]) # Define training 'documents' document_1 = numpy.asarray( [[1, 0, 0, 0], [1, 0, 0, 0], [0, 1, 0, 0], [1, 0, 0, 0], [0, 1, 0, 0],
import numpy, pylab import theano from rstreams import RandomStreams import distributions from sample import mh2_sample from for_theano import memoized s_rng = RandomStreams(23424) phi = s_rng.dirichlet(numpy.asarray([1, 1, 1, 1, 1])) alpha = s_rng.gamma(2., 2.) prototype = phi*alpha bag_prototype = memoized(lambda bag: s_rng.dirichlet(prototype)) draw_marbles = lambda bag, nr: s_rng.multinomial(1, bag_prototype(bag), draw_shape=(nr,)) marbles_bag_1 = numpy.asarray([[1,1,1,1,1,1], [0,0,0,0,0,0], [0,0,0,0,0,0], [0,0,0,0,0,0], [0,0,0,0,0,0]], dtype=theano.config.floatX).T marbles_bag_2 = numpy.asarray([[0,0,0,0,0,0], [1,1,1,1,1,1], [0,0,0,0,0,0], [0,0,0,0,0,0], [0,0,0,0,0,0]], dtype=theano.config.floatX).T marbles_bag_3 = numpy.asarray([[0,0,0,0,0,0], [0,0,0,0,0,0], [0,0,0,0,0,0], [1,1,1,1,1,1], [0,0,0,0,0,0]], dtype=theano.config.floatX).T
[1,1,1,1,1,1], [0,0,0,0,0,0], [0,0,0,0,0,0], [0,0,0,0,0,0]], dtype=theano.config.floatX).T marbles_bag_3 = numpy.asarray([[0,0,0,0,0,0], [0,0,0,0,0,0], [0,0,0,0,0,0], [1,1,1,1,1,1], [0,0,0,0,0,0]], dtype=theano.config.floatX).T marbles_bag_4 = numpy.asarray([[0],[0],[0],[0],[1]], dtype=theano.config.floatX).T # Define flat model bag_prototype = memoized(lambda bag: s_rng.dirichlet(numpy.asarray([1, 1, 1, 1, 1])*5)) draw_marbles = lambda bag, nr: s_rng.multinomial(1, bag_prototype(bag), draw_shape=(nr,)) # Generate samples from the model givens = {draw_marbles(1,6): marbles_bag_1, draw_marbles(2,6): marbles_bag_2, draw_marbles(3,6): marbles_bag_3, draw_marbles(4,1): marbles_bag_4} sampler = mh2_sample(s_rng, [draw_marbles(4,1)], givens) samples = sampler(200, 100, 100) data = samples[0] # Show histogram pylab.subplot(211) pylab.bar(range(5), data.sum(axis=0))