def likelihood_gradient(observations = {}, learning_rate = 0.1): all_vars = ancestors(list(observations.keys())) for o in observations: assert o in all_vars if not is_raw_rv(o): raise TypeError(o) RVs = [v for v in all_vars if is_raw_rv(v)] free_RVs = [v for v in RVs if v not in observations] # Instantiate actual values for the different random variables: params = dict() for v in free_RVs: f = theano.function([], v, mode=theano.Mode(linker='py', optimizer=None)) params[v] = theano.shared(f()) # Compute the full log likelihood: full_observations = dict(observations) full_observations.update(params) log_likelihood = full_log_likelihood(full_observations) # Construct the update equations for learning: updates = dict() for frvs in params.values(): updates[frvs] = frvs + learning_rate * tensor.grad(log_likelihood, frvs) return params, updates, log_likelihood
def all_raw_rvs(outputs): """ Return a list of all random variables required to compute `outputs`. """ all_vars = ancestors(outputs) assert outputs[0] in all_vars rval = [v for v in all_vars if is_raw_rv(v)] return rval
def is_rv(var, blockers=None): """ Return True iff var is a random variable. A random variable is a variable with a randomstate object in its ancestors. """ #TODO: could optimize by stopping the recusion as soon as a randomstate is # found return any(is_randomstate(v) for v in ancestors([var], blockers=blockers))
def mh_sample(s_rng, outputs, observations={}): all_vars = ancestors(list(outputs) + list(observations.keys())) for o in observations: assert o in all_vars if not is_raw_rv(o): raise TypeError(o) RVs = [v for v in all_vars if is_raw_rv(v)] free_RVs = [v for v in RVs if v not in observations] # Draw sample from the proposal free_RVs_state = [] for v in free_RVs: f = theano.function([], v, mode=theano.Mode(linker='py', optimizer=None)) free_RVs_state.append(theano.shared(f())) log_likelihood = theano.shared(numpy.array(float('-inf'))) U = s_rng.uniform(low=0.0, high=1.0) def mcmc(ll, *frvs): proposals = [ s_rng.local_proposal(v, rvs) for v, rvs in zip(free_RVs, frvs) ] proposals_rev = [ s_rng.local_proposal(v, rvs) for v, rvs in zip(free_RVs, proposals) ] full_observations = dict(observations) full_observations.update( dict([(rv, s) for rv, s in zip(free_RVs, proposals)])) new_log_likelihood = full_log_likelihood(full_observations) logratio = new_log_likelihood - ll \ + tensor.add(*[tensor.sum(lpdf(p, r)) for p, r in zip(proposals_rev, frvs)]) \ - tensor.add(*[tensor.sum(lpdf(p, r)) for p, r in zip(proposals, proposals)]) accept = tensor.gt(logratio, tensor.log(U)) return [tensor.switch(accept, new_log_likelihood, ll)] + \ [tensor.switch(accept, p, f) for p, f in zip(proposals, frvs)], \ {}, theano.scan_module.until(accept) samples, updates = theano.scan(mcmc, outputs_info=[log_likelihood] + free_RVs_state, n_steps=100) updates[log_likelihood] = samples[0][-1] updates.update( dict([(f, s[-1]) for f, s in zip(free_RVs_state, samples[1:])])) return [free_RVs_state[free_RVs.index(out)] for out in outputs], log_likelihood, updates
def condition(rvs, observations): if len(rvs) > 1: raise NotImplementedError() observations = typed_items(observations) # if none of the rvs show up in the ancestors of any observations # then this is easy conditioning obs_ancestors = ancestors(observations.keys(), blockers=rvs) if any(rv in obs_ancestors for rv in rvs): # not-so-easy conditioning # we need to produce a sampler-driven model raise NotImplementedError() else: # easy conditioning rvs_anc = ancestors(rvs, blockers=observations.keys()) frontier = [r for r in rvs_anc if r.owner is None or r in observations.keys()] cloned_inputs, cloned_outputs = clone_keep_replacements(frontier, rvs, replacements=observations) return cloned_outputs
def full_log_likelihood(assignment): """ Return log(P(rv0=sample)) assignment: rv0=val0, rv1=val1, ... Each of val0, val1, ... v0, v1, ... is supposed to represent an identical number of draws from a distribution. This function returns the real-valued density for each one of those draws. The output from this function may be a random variable, if not all sources of randomness are removed by the assignment and the given. """ for rv in assignment.keys(): if not is_rv(rv): raise ValueError('non-random var in assignment key', rv) # All random variables that are not assigned should stay as the same object so it can later be replaced # If this is not done this way, they get cloned RVs = [v for v in ancestors(assignment.keys()) if is_raw_rv(v)] for rv in RVs: if rv not in assignment: assignment[rv] = rv # Cast assignment elements to the right kind of thing assignment = typed_items(assignment) pdfs = [lpdf(rv, sample) for rv, sample in assignment.items()] lik = tensor.add(*[tensor.sum(p) for p in pdfs]) dfs_variables = ancestors([lik], blockers=assignment.keys()) frontier = [r for r in dfs_variables if r.owner is None or r in assignment.keys()] cloned_inputs, cloned_outputs = clone_keep_replacements(frontier, [lik], replacements=assignment) cloned_lik, = cloned_outputs return cloned_lik
def mh_sample(s_rng, outputs, observations = {}): all_vars = ancestors(list(outputs) + list(observations.keys())) for o in observations: assert o in all_vars if not is_raw_rv(o): raise TypeError(o) RVs = [v for v in all_vars if is_raw_rv(v)] free_RVs = [v for v in RVs if v not in observations] # Draw sample from the proposal free_RVs_state = [] for v in free_RVs: f = theano.function([], v, mode=theano.Mode(linker='py', optimizer=None)) free_RVs_state.append(theano.shared(f())) log_likelihood = theano.shared(numpy.array(float('-inf'))) U = s_rng.uniform(low=0.0, high=1.0) def mcmc(ll, *frvs): proposals = [s_rng.local_proposal(v, rvs) for v, rvs in zip(free_RVs, frvs)] proposals_rev = [s_rng.local_proposal(v, rvs) for v, rvs in zip(free_RVs, proposals)] full_observations = dict(observations) full_observations.update(dict([(rv, s) for rv, s in zip(free_RVs, proposals)])) new_log_likelihood = full_log_likelihood(full_observations) logratio = new_log_likelihood - ll \ + tensor.add(*[tensor.sum(lpdf(p, r)) for p, r in zip(proposals_rev, frvs)]) \ - tensor.add(*[tensor.sum(lpdf(p, r)) for p, r in zip(proposals, proposals)]) accept = tensor.gt(logratio, tensor.log(U)) return [tensor.switch(accept, new_log_likelihood, ll)] + \ [tensor.switch(accept, p, f) for p, f in zip(proposals, frvs)], \ {}, theano.scan_module.until(accept) samples, updates = theano.scan(mcmc, outputs_info = [log_likelihood] + free_RVs_state, n_steps = 100) updates[log_likelihood] = samples[0][-1] updates.update(dict([(f, s[-1]) for f, s in zip(free_RVs_state, samples[1:])])) return [free_RVs_state[free_RVs.index(out)] for out in outputs], log_likelihood, updates
def hybridmc_sample(s_rng, outputs, observations = {}): # TODO: should there be a size variable here? # TODO: implement lag and burn-in # TODO: implement size """ Return a dictionary mapping random variables to their sample values. """ all_vars = ancestors(list(outputs) + list(observations.keys())) for o in observations: assert o in all_vars if not is_raw_rv(o): raise TypeError(o) RVs = [v for v in all_vars if is_raw_rv(v)] free_RVs = [v for v in RVs if v not in observations] free_RVs_state = [theano.shared(numpy.ones(shape=infer_shape(v)), broadcastable=tuple(numpy.asarray(infer_shape(v))==1)) for v in free_RVs] free_RVs_prop = [s_rng.normal(0, 1, draw_shape=infer_shape(v)) for v in free_RVs] log_likelihood = theano.shared(numpy.array(float('-inf'))) U = s_rng.uniform(low=0, high=1.0) epsilon = numpy.sqrt(2*0.03) def mcmc(ll, *frvs): full_observations = dict(observations) full_observations.update(dict([(rv, s) for rv, s in zip(free_RVs, frvs)])) loglik = -full_log_likelihood(full_observations) proposals = free_RVs_prop H = tensor.add(*[tensor.sum(tensor.sqr(p)) for p in proposals])/2. + loglik # -- this should be an inner loop g = [] g.append(tensor.grad(loglik, frvs)) proposals = [(p - epsilon*gg[0]/2.) for p, gg in zip(proposals, g)] rvsp = [(rvs + epsilon*rvp) for rvs,rvp in zip(frvs, proposals)] full_observations = dict(observations) full_observations.update(dict([(rv, s) for rv, s in zip(free_RVs, rvsp)])) new_loglik = -full_log_likelihood(full_observations) gnew = [] gnew.append(tensor.grad(new_loglik, rvsp)) proposals = [(p - epsilon*gn[0]/2.) for p, gn in zip(proposals, gnew)] # -- Hnew = tensor.add(*[tensor.sum(tensor.sqr(p)) for p in proposals])/2. + new_loglik dH = Hnew - H accept = tensor.or_(dH < 0., U < tensor.exp(-dH)) return [tensor.switch(accept, -new_loglik, ll)] + \ [tensor.switch(accept, p, f) for p, f in zip(rvsp, frvs)], \ {}, theano.scan_module.until(accept) samples, updates = theano.scan(mcmc, outputs_info = [log_likelihood] + free_RVs_state, n_steps = 10000000) updates[log_likelihood] = samples[0][-1] updates.update(dict([(f, s[-1]) for f, s in zip(free_RVs_state, samples[1:])])) return [free_RVs_state[free_RVs.index(out)] for out in outputs], log_likelihood, updates
def mh2_sample(s_rng, outputs, observations = {}, givens = {}): all_vars = ancestors(list(observations.keys()) + list(outputs)) for o in observations: assert o in all_vars if not is_raw_rv(o): raise TypeError(o) RVs = [v for v in all_vars if is_raw_rv(v)] free_RVs = [v for v in RVs if v not in observations] free_RVs_state = [] for v in free_RVs: f = theano.function([], v, mode=theano.Mode(linker='py', optimizer=None)) free_RVs_state.append(theano.shared(f())) U = s_rng.uniform(low=0.0, high=1.0) rr = [] for index in range(len(free_RVs)): # TODO: why does the compiler crash when we try to expose the likelihood ? full_observations = dict(observations) full_observations.update(dict([(rv, s) for rv, s in zip(free_RVs, free_RVs_state)])) log_likelihood = full_log_likelihood(full_observations) proposal = s_rng.local_proposal(free_RVs[index], free_RVs_state[index]) proposal_rev = s_rng.local_proposal(free_RVs[index], proposal) full_observations = dict(observations) full_observations.update(dict([(rv, s) for rv, s in zip(free_RVs, free_RVs_state)])) full_observations.update(dict([(free_RVs[index], proposal)])) new_log_likelihood = full_log_likelihood(full_observations) bw = tensor.sum(lpdf(proposal_rev, free_RVs_state[index])) fw = tensor.sum(lpdf(proposal, proposal)) lr = new_log_likelihood-log_likelihood+bw-fw accept = tensor.gt(lr, tensor.log(U)) updates = {free_RVs_state[index] : tensor.switch(accept, proposal, free_RVs_state[index])} rr.append(theano.function([], [accept], updates=updates, givens=givens)) # TODO: this exacte amount of samples given back is still wrong def sampler(nr_samples, burnin = 100, lag = 100): data = [[] for o in outputs] for i in range(nr_samples*lag+burnin): accept = False while not accept: index = numpy.random.randint(len(free_RVs)) accept = rr[index]() if accept and i > burnin and (i-burnin) % lag == 0: for d, o in zip(data, outputs): # TODO: this can be optimized if is_raw_rv(o): d.append(free_RVs_state[free_RVs.index(o)].get_value()) else: full_observations = dict(observations) full_observations.update(dict([(rv, s) for rv, s in zip(free_RVs, free_RVs_state)])) d.append(evaluate(evaluate_with_assignments(o, full_observations), givens=givens)) data = [numpy.asarray(d).squeeze() for d in data] return data return sampler
def mh2_sample(s_rng, outputs, observations={}, givens={}): all_vars = ancestors(list(observations.keys()) + list(outputs)) for o in observations: assert o in all_vars if not is_raw_rv(o): raise TypeError(o) RVs = [v for v in all_vars if is_raw_rv(v)] free_RVs = [v for v in RVs if v not in observations] free_RVs_state = [] for v in free_RVs: f = theano.function([], v, mode=theano.Mode(linker='py', optimizer=None)) free_RVs_state.append(theano.shared(f())) U = s_rng.uniform(low=0.0, high=1.0) rr = [] for index in range(len(free_RVs)): # TODO: why does the compiler crash when we try to expose the likelihood ? full_observations = dict(observations) full_observations.update( dict([(rv, s) for rv, s in zip(free_RVs, free_RVs_state)])) log_likelihood = full_log_likelihood(full_observations) proposal = s_rng.local_proposal(free_RVs[index], free_RVs_state[index]) proposal_rev = s_rng.local_proposal(free_RVs[index], proposal) full_observations = dict(observations) full_observations.update( dict([(rv, s) for rv, s in zip(free_RVs, free_RVs_state)])) full_observations.update(dict([(free_RVs[index], proposal)])) new_log_likelihood = full_log_likelihood(full_observations) bw = tensor.sum(lpdf(proposal_rev, free_RVs_state[index])) fw = tensor.sum(lpdf(proposal, proposal)) lr = new_log_likelihood - log_likelihood + bw - fw accept = tensor.gt(lr, tensor.log(U)) updates = { free_RVs_state[index]: tensor.switch(accept, proposal, free_RVs_state[index]) } rr.append(theano.function([], [accept], updates=updates, givens=givens)) # TODO: this exacte amount of samples given back is still wrong def sampler(nr_samples, burnin=100, lag=100): data = [[] for o in outputs] for i in range(nr_samples * lag + burnin): accept = False while not accept: index = numpy.random.randint(len(free_RVs)) accept = rr[index]() if accept and i > burnin and (i - burnin) % lag == 0: for d, o in zip(data, outputs): # TODO: this can be optimized if is_raw_rv(o): d.append( free_RVs_state[free_RVs.index(o)].get_value()) else: full_observations = dict(observations) full_observations.update( dict([ (rv, s) for rv, s in zip(free_RVs, free_RVs_state) ])) d.append( evaluate(evaluate_with_assignments( o, full_observations), givens=givens)) data = [numpy.asarray(d).squeeze() for d in data] return data return sampler
def hybridmc_sample(s_rng, outputs, observations={}): # TODO: should there be a size variable here? # TODO: implement lag and burn-in # TODO: implement size """ Return a dictionary mapping random variables to their sample values. """ all_vars = ancestors(list(outputs) + list(observations.keys())) for o in observations: assert o in all_vars if not is_raw_rv(o): raise TypeError(o) RVs = [v for v in all_vars if is_raw_rv(v)] free_RVs = [v for v in RVs if v not in observations] free_RVs_state = [ theano.shared(numpy.ones(shape=infer_shape(v)), broadcastable=tuple(numpy.asarray(infer_shape(v)) == 1)) for v in free_RVs ] free_RVs_prop = [ s_rng.normal(0, 1, draw_shape=infer_shape(v)) for v in free_RVs ] log_likelihood = theano.shared(numpy.array(float('-inf'))) U = s_rng.uniform(low=0, high=1.0) epsilon = numpy.sqrt(2 * 0.03) def mcmc(ll, *frvs): full_observations = dict(observations) full_observations.update( dict([(rv, s) for rv, s in zip(free_RVs, frvs)])) loglik = -full_log_likelihood(full_observations) proposals = free_RVs_prop H = tensor.add(*[tensor.sum(tensor.sqr(p)) for p in proposals]) / 2. + loglik # -- this should be an inner loop g = [] g.append(tensor.grad(loglik, frvs)) proposals = [(p - epsilon * gg[0] / 2.) for p, gg in zip(proposals, g)] rvsp = [(rvs + epsilon * rvp) for rvs, rvp in zip(frvs, proposals)] full_observations = dict(observations) full_observations.update( dict([(rv, s) for rv, s in zip(free_RVs, rvsp)])) new_loglik = -full_log_likelihood(full_observations) gnew = [] gnew.append(tensor.grad(new_loglik, rvsp)) proposals = [(p - epsilon * gn[0] / 2.) for p, gn in zip(proposals, gnew)] # -- Hnew = tensor.add(*[tensor.sum(tensor.sqr(p)) for p in proposals]) / 2. + new_loglik dH = Hnew - H accept = tensor.or_(dH < 0., U < tensor.exp(-dH)) return [tensor.switch(accept, -new_loglik, ll)] + \ [tensor.switch(accept, p, f) for p, f in zip(rvsp, frvs)], \ {}, theano.scan_module.until(accept) samples, updates = theano.scan(mcmc, outputs_info=[log_likelihood] + free_RVs_state, n_steps=10000000) updates[log_likelihood] = samples[0][-1] updates.update( dict([(f, s[-1]) for f, s in zip(free_RVs_state, samples[1:])])) return [free_RVs_state[free_RVs.index(out)] for out in outputs], log_likelihood, updates