def setUp(self): s_rng = self.s_rng = RandomStreams(23424) self.nr_states = 5 self.nr_obs = 3 self.observation_model = memoized(lambda state: s_rng.dirichlet([1]*self.nr_obs)) self.transition_model = memoized(lambda state: s_rng.dirichlet([1]*self.nr_states)) self.transition = lambda state: s_rng.multinomial(1, self.tranisition_model(state)) self.observation = lambda state: s_rng.multinomial(1, self.observation_model(state)) def transition(obs, state): return [self.observation(state), self.transition(state)] ,{}, until(state == numpy.asarray([0,0,0,0,1])) [self.sampled_words, self.sampled_states], updates = scan([], [obs, state])
def setUp(self): s_rng = self.s_rng = RandomStreams(23424) self.nr_states = 5 self.nr_obs = 3 self.observation_model = memoized( lambda state: s_rng.dirichlet([1] * self.nr_obs)) self.transition_model = memoized( lambda state: s_rng.dirichlet([1] * self.nr_states)) self.transition = lambda state: s_rng.multinomial( 1, self.tranisition_model(state)) self.observation = lambda state: s_rng.multinomial( 1, self.observation_model(state)) def transition(obs, state): return [self.observation(state), self.transition(state) ], {}, until(state == numpy.asarray([0, 0, 0, 0, 1])) [self.sampled_words, self.sampled_states], updates = scan([], [obs, state])
import theano from theano import tensor from rstreams import RandomStreams import distributions from sample import mh2_sample, mh_sample from for_theano import memoized, evaluate s_rng = RandomStreams(123) nr_words = 4 nr_topics = 2 alpha = 0.8 beta = 1. # Topic distribution per document doc_mixture = memoized(lambda doc_id: s_rng.dirichlet([alpha/nr_topics]*nr_topics)) # Word distribution per topic topic_mixture = memoized(lambda top_id: s_rng.dirichlet([beta/nr_words]*nr_words)) # For each word in the document, draw a topic according to multinomial with document specific prior # TODO, see comment below: topics = memoized(lambda doc_id, nr: s_rng.multinomial(1, doc_mixture[doc_id], draw_shape=(nr,))) topics = memoized(lambda doc_id, nr: s_rng.binomial(1, doc_mixture(doc_id)[0], draw_shape=(nr,))) # Draw words for a specific topic word_topic = lambda top_id: s_rng.multinomial(1, topic_mixture(top_id)) # TODO: memoized only works on the pre-compiled graph. This makes it fail in the case where we have to map # a vector of topics to individual multinomials with as priors the different topics. In the case of two topics # we can hack around this by using a binomial topic distribution and using a switch statement here: word_topic_mapper = lambda top_id: tensor.switch(top_id, word_topic(0), word_topic(1))
import theano from theano import tensor from rstreams import RandomStreams import distributions from sample import mh2_sample, mh_sample from for_theano import memoized, evaluate s_rng = RandomStreams(123) nr_words = 4 nr_topics = 2 alpha = 0.8 beta = 1. # Topic distribution per document doc_mixture = memoized( lambda doc_id: s_rng.dirichlet([alpha / nr_topics] * nr_topics)) # Word distribution per topic topic_mixture = memoized( lambda top_id: s_rng.dirichlet([beta / nr_words] * nr_words)) # For each word in the document, draw a topic according to multinomial with document specific prior # TODO, see comment below: topics = memoized(lambda doc_id, nr: s_rng.multinomial(1, doc_mixture[doc_id], draw_shape=(nr,))) topics = memoized(lambda doc_id, nr: s_rng.binomial( 1, doc_mixture(doc_id)[0], draw_shape=(nr, ))) # Draw words for a specific topic word_topic = lambda top_id: s_rng.multinomial(1, topic_mixture(top_id)) # TODO: memoized only works on the pre-compiled graph. This makes it fail in the case where we have to map # a vector of topics to individual multinomials with as priors the different topics. In the case of two topics
import numpy, pylab import theano from rstreams import RandomStreams import distributions from sample import mh2_sample from for_theano import memoized s_rng = RandomStreams(23424) phi = s_rng.dirichlet(numpy.asarray([1, 1, 1, 1, 1])) alpha = s_rng.gamma(2., 2.) prototype = phi*alpha bag_prototype = memoized(lambda bag: s_rng.dirichlet(prototype)) draw_marbles = lambda bag, nr: s_rng.multinomial(1, bag_prototype(bag), draw_shape=(nr,)) marbles_bag_1 = numpy.asarray([[1,1,1,1,1,1], [0,0,0,0,0,0], [0,0,0,0,0,0], [0,0,0,0,0,0], [0,0,0,0,0,0]], dtype=theano.config.floatX).T marbles_bag_2 = numpy.asarray([[0,0,0,0,0,0], [1,1,1,1,1,1], [0,0,0,0,0,0], [0,0,0,0,0,0], [0,0,0,0,0,0]], dtype=theano.config.floatX).T marbles_bag_3 = numpy.asarray([[0,0,0,0,0,0], [0,0,0,0,0,0], [0,0,0,0,0,0], [1,1,1,1,1,1], [0,0,0,0,0,0]], dtype=theano.config.floatX).T
marbles_bag_2 = numpy.asarray([[0,0,0,0,0,0], [1,1,1,1,1,1], [0,0,0,0,0,0], [0,0,0,0,0,0], [0,0,0,0,0,0]], dtype=theano.config.floatX).T marbles_bag_3 = numpy.asarray([[0,0,0,0,0,0], [0,0,0,0,0,0], [0,0,0,0,0,0], [1,1,1,1,1,1], [0,0,0,0,0,0]], dtype=theano.config.floatX).T marbles_bag_4 = numpy.asarray([[0],[0],[0],[0],[1]], dtype=theano.config.floatX).T # Define flat model bag_prototype = memoized(lambda bag: s_rng.dirichlet(numpy.asarray([1, 1, 1, 1, 1])*5)) draw_marbles = lambda bag, nr: s_rng.multinomial(1, bag_prototype(bag), draw_shape=(nr,)) # Generate samples from the model givens = {draw_marbles(1,6): marbles_bag_1, draw_marbles(2,6): marbles_bag_2, draw_marbles(3,6): marbles_bag_3, draw_marbles(4,1): marbles_bag_4} sampler = mh2_sample(s_rng, [draw_marbles(4,1)], givens) samples = sampler(200, 100, 100) data = samples[0] # Show histogram pylab.subplot(211)