def dgei(self, nres=None, qres=128, gres=128, pgb=None): nres = nres or self.nlims[1] nset = np.array(np.round( np.linspace(self.nlims[0], self.nlims[1], nres)), dtype=int).tolist() self.n = pb.RV('n', nset, vtype=int, pscale='log') self.n.set_prob(lambda x: 1. / x) self.q = pb.RV('q', self.qlims, vtype=float, pscale='log') self.g = pb.RV('g', self.glims, vtype=float, pscale='log') self.q.set_mfun(( np.log, np.exp, )) self.g.set_mfun(( np.log, np.exp, )) self.paras = pb.RJ(self.n, self.q, self.g) self.model = pb.RF(self.stats, self.paras) self.model.set_prob(lqlf, eps=self.eps, pgb=pgb) call = {'x,i': self.xi, 'n': np.array(nset), 'q': {qres}, 'g': {gres}} ll = self.model(call) self.llx = ll.rescaled(np.complex(np.mean(ll.prob))) I = np.ravel(self.llx.vals['i']) u = np.unique(I).tolist() self.lli = [self.llx({'i': i}, keepdims=True) for i in u] self.ll = [ll.prod(['x', 'i']) for ll in self.lli] ll = tuple(self.ll) self.likelihood = pb.product(*ll) vals = self.likelihood.ret_cond_vals() self.prior = self.paras(vals) return self.set_joint(self.prior * self.likelihood)
def test_dists(dist_scipy, args_scipy, dist_sympy, args_sympy, vtype, vset, values): # Scipy distribution x = pb.RV('x', vtype=vtype, vset=vset) if isinstance(args_scipy, dict): x.set_prob(dist_scipy, **args_scipy) else: x.set_prob(dist_scipy, *args_scipy) px = x({x: values}) # Sympy distribution y = pb.RV('y', vtype=vtype, vset=vset) y.set_prob(dist_sympy(y[:], *args_sympy)) py = y({y: values}) assert np.allclose(px.prob, py.prob), \ "Inconsistent results comparing {} vs {}".format( dist_scipy, dist_sympy)
def set_data(self, data, eps=None): self.eps = eps self.data = data if isinstance(data, list) else [data] self.means = np.array([np.mean(subdata) for subdata in self.data]) self.num = len(self.means) self.polarity = 1. if np.mean(np.hstack(data)) >= 0. else -1. if self.eps is None: tails = [subdata[subdata >= 0.] for subdata in self.data] if self.polarity > 0.\ else [subdata[subdata >= 0.] for subdata in self.data] tails = np.hstack(tails) if not len(tails): self.eps = 1. else: self.eps = 1.253 * np.mean(tails) means = self.polarity * self.means plims = self.plims nqlims = [np.min(means), np.max(means)] if np.max(means) / np.min(means) < np.max(plims) / np.min(plims): nqlo = np.max(means) / np.max(plims) nqhi = np.min(means) / np.min(plims) nqlims = [np.minimum(nqlo, nqhi), np.maximum(nqlo, nqhi)] self.qlims = [ np.min(nqlims) / np.max(self.nlims), np.max(nqlims) / np.min(self.nlims) ] self.x = pb.RV('x', (-np.inf, np.inf), vtype=float) self.i = pb.RV('i', [0, self.num], vtype=int) self.stats = pb.RJ(self.x, self.i) self.xi = [[], []] for i, subdata in enumerate(self.data): self.xi[0].append(self.polarity * subdata) self.xi[1].append(np.tile(i, len(subdata))) self.xi[0] = np.hstack(self.xi[0]) self.xi[1] = np.hstack(self.xi[1])
# Sampling program to test simple 1-D sampling import probayes as pb num_samples = 10 sample_range = [0., 1.] x = pb.RV('x', vtype=float, vset=sample_range) p = pb.SP(x) # Method one sampler_0 = p.sampler() samples = [] while len(samples) < num_samples: samples.append(next(sampler_0)) print("Number of samples: {}".format(len(samples))) # Method two sampler_1 = p.sampler(num_samples) samples = [sample for sample in sampler_1] print("Number of samples: {}".format(len(samples)))
from pylab import * ion() # Settings rand_size = 60 rand_mean = 50. rand_stdv = 10. mu_lims = (40, 60) sigma_lims = (5, 20.) resolution = {'mu': {128}, 'sigma': {192}} # Generate data data = np.random.normal(loc=rand_mean, scale=rand_stdv, size=rand_size) # Declare RVs mu = pb.RV('mu', vtype=float, vset=mu_lims) sigma = pb.RV('sigma', vtype=float, vset=sigma_lims) x = pb.RV('x', vtype=float, vset={-np.inf, np.inf}) # Set reciprocal prior for sigma sigma.set_ufun((np.log, np.exp)) # Set up params and models paras = pb.RF(mu, sigma) stats = pb.RF(x) model = pb.SD(stats, paras) model.set_prob(scipy.stats.norm.logpdf, order={ 'x': 0, 'mu': 'loc', 'sigma': 'scale'
import numpy as np from pylab import * ion() # Prob convention: successor dimension (row) > predecessor dimension (col) tran = np.array([0., 0., .5, 1., .5, 0., 0., .5, .5], ).reshape([3, 3]) n_sims = 2000 m_steps = 12 # max_steps # Analytical solution (obtained from the eigenvalues of tran) m = np.arange(1, m_steps + 1) mpi_2 = m * np.pi / 2 hatp = 0.2 + 0.5**m * (0.8 * np.cos(mpi_2) - 0.4 * np.sin(mpi_2)) # Simulation x = pb.RV('x', range(3)) x.set_tran(tran) X = pb.SP(x) cond = [None] * n_sims succ = np.empty([n_sims, m_steps], dtype=int) print('Simulating...') for i in range(n_sims): cond[i] = [None] * m_steps sampler = X.sampler({'x': 0}, stop=m_steps) samples = X.walk(sampler) summary = X(samples) cond[i] = summary.q.prob succ[i] = summary.q["x'"] print('...done') obsp = np.sum(succ == 0, axis=0) / n_sims
import numpy as np """ Consider a disease with a prevalence 1\% in a given population. Of those with the disease, 98\% manifest a particular symptom, that is present only in 10\% of those without the disease. What is the probability someone with symptoms has the disease? Answer: \approx 9% """ # PARAMETERS prevalence = 0.01 sym_if_dis = 0.98 sym_if_undis = 0.1 # SET UP RANDOM VARIABLES dis = pb.RV('dis', prob=prevalence) sym = pb.RV('sym') # SET UP STOCHASTIC CONDITION sym_given_dis = sym | dis sym_given_dis.set_prob(np.array([1-sym_if_undis, 1-sym_if_dis, \ sym_if_undis, sym_if_dis]).reshape((2,2))) # APPLY BAYES' RULE p_dis = dis() p_sym_given_dis = sym_given_dis() p_dis_and_sym = p_dis * p_sym_given_dis p_sym = p_dis_and_sym.marginal('sym') p_dis_given_sym = p_dis_and_sym / p_sym inference = p_dis_given_sym({'dis': True, 'sym': True}) print(inference)
ion() import probayes as pb # PARAMETERS radius = 1. n_steps = 6000 cols = {False: 'r', True: 'b'} # SETUP CIRCLE FUNCTION AND RVs def inside(x, y): return x**2 + y**2 <= radius**2 xy_range = (-radius, radius) x = pb.RV("x", xy_range) y = pb.RV("y", xy_range) # DEFINE RANDOM FIELD xy = x & y xy.set_delta((0.15 * radius, ), bound=True) xy.set_prop(inside, order={'x': None, 'y': None, "x'": 0, "y'": 1}) steps = [None] * n_steps pred = [None] * n_steps succ = [None] * n_steps cond = np.empty(n_steps, dtype=float) print('Simulating...') for i in range(n_steps): if i == 0: steps[i] = xy.step({0})
return scipy.stats.norm.pdf(succ, loc=loc, scale=scale) def tcdf(succ, pred): loc = -np.sin(pred) scale = 1. + 0.5 * np.cos(pred) return scipy.stats.norm.cdf(succ, loc=loc, scale=scale) def ticdf(succ, pred): loc = -np.sin(pred) scale = 1. + 0.5 * np.cos(pred) return scipy.stats.norm.ppf(succ, loc=loc, scale=scale) x = pb.RV('x', set_lims) x.set_tran(tran, order={'x': 'pred', "x'": 'succ'}) x.set_tfun((tcdf, ticdf), order={'x': 'pred', "x'": 'succ'}) steps = [None] * n_steps pred = np.empty(n_steps, dtype=float) succ = np.empty(n_steps, dtype=float) cond = np.empty(n_steps, dtype=float) print('Simulating...') for i in range(n_steps): if i == 0: steps[i] = x.step({0}) else: steps[i] = x.step(succ[i - 1]) pred[i] = steps[i]['x'] succ[i] = steps[i]["x'"]
""" Example of a RV with an implicit probability distribution defined according to a functional transformation: if: x' ~ uniform f(x) = arcsin(sqrt(x')) then x ~ arcsine distirbution """ import probayes as pb import sympy from pylab import * ion() x = pb.RV('x', vtype=float, vset=(0, 1)) x.set_ufun(sympy.asin(sympy.sqrt(x[:])), no_ucov=True) fx = x({200}) x.set_ufun(sympy.asin(sympy.sqrt(x[:])), no_ucov=False) rx = x({-10000}) figure() subplot(2, 1, 1) plot(fx['x'], fx.prob) xlabel('x') ylabel('Prob / density') gca().set_ylim(0., 1.02 * max(fx.prob)) title("{}".format(x.prob)) subplot(2, 1, 2) hist(rx['x'], 50) xlabel('x (random samples)')
# Example of sampling from a normal probability density function import scipy.stats from pylab import * ion() import probayes as pb norm_range = {-2., 2.} set_size = {-10000} # size negation denotes random sampling x = pb.RV("x", norm_range, prob=scipy.stats.norm, loc=0, scale=1) rx = x.evaluate(set_size) hist(rx['x'], 100)
# Example of a 1D normal probability density function import scipy.stats from pylab import *; ion() import probayes as pb set_lims = [-3., 3.] set_size = {100} rv = pb.RV("norm", set_lims, prob=scipy.stats.norm, pscale='log', loc=0, scale=1) logpdf = rv(set_size) pdf = logpdf.rescaled() figure() plot(pdf['norm'], pdf.prob)
radius = 1. set_size = {-10000} # SETUP CIRCLE FUNCTION AND RVs def inside(x, y): return np.array(x**2 + y**2 <= radius**2, dtype=float) def norm2d(x, y, loc=0., scale=radius): return scipy.stats.norm.pdf(x, loc=loc, scale=scale) * \ scipy.stats.norm.pdf(y, loc=loc, scale=scale) xy_range = [-radius, radius] x = pb.RV("x", xy_range) y = pb.RV("y", xy_range) # DEFINE STOCHASTIC CONDITION xy = x & y xy.set_prob(inside) # DEFINE PROPOSAL DENSITY AND COEFFICIENT VARIABLE xy.set_prop(norm2d) coef_max = float(norm2d(radius, 1.)) coef = pb.RV('coef', {0., coef_max}) coefs = coef(set_size) p_prop = xy.propose({(x, y): set_size}, suffix=False) thresholds = coefs['coef'] * p_prop.prob # CALL TARGET DENSITY AND APPLY REJECTION SAMPLING
from pylab import *; ion() # PARAMETERS rand_size = 60 rand_mean = 50. rand_stdv = 10. n_steps = 5000 step_size = (0.005,) mu_lims = (40, 60) sigma_lims = (5, 20.) # SIMULATE DATA x_obs = np.random.normal(loc=rand_mean, scale=rand_stdv, size=rand_size) # SET UP MODEL AND SAMPLER mu = pb.RV('mu', vtype=float, vset=mu_lims, pscale='log') sigma = pb.RV('sigma', vtype=float, vset=sigma_lims, pscale='log') x = pb.RV('x', vtype=float, vset=(-np.inf, np.inf)) sigma.set_ufun((np.log, np.exp)) paras = pb.RF(mu, sigma) stats = pb.RF(x) process = pb.SP(stats, paras) process.set_prob(scipy.stats.norm.logpdf, order={'x':0, 'mu':'loc', 'sigma':'scale'}) tran = lambda **x: 1. paras.set_tran((tran, tran)) paras.set_delta(step_size, scale=True) process.set_tran(paras) process.set_delta(paras) process.set_scores('hastings') process.set_update('metropolis')
# Example of a 3D normal probability density function with covariance import scipy.stats import probayes as pb set_lims = (-3., 3.) set_size_0 = {200} set_size_1 = {300} set_size_2 = {400} means = [0.5, 0., -0.5] covar = [[2., 0.3, -0.3], [0.3, 1., -0.5], [-0.3, -0.5, 0.5]] x = pb.RV("x", set_lims, pscale='log') y = pb.RV("y", set_lims, pscale='log') z = pb.RV("z", set_lims, pscale='log') xyz = x & y & z xyz.set_prob(scipy.stats.multivariate_normal, mean=means, cov=covar) pxyz = xyz({'x': set_size_0, 'y': set_size_1, 'z': set_size_2}) p_xyz = pxyz.rescaled() pmf = p_xyz.prob[:-1, :-1, :-1]
# Example of a 2D multivariate random walk import scipy.stats from pylab import * ion() import probayes as pb set_lims = (-10., 10.) nsteps = 2000 means = [0.5, -0.5] covar = [[1.5, -1.0], [-1.0, 2.]] x = pb.RV('x', vtype=float, vset=set_lims) y = pb.RV('y', vtype=float, vset=set_lims) xy = x & y xy.set_prob(scipy.stats.multivariate_normal, means, covar) xy.set_tran(scipy.stats.multivariate_normal, means, covar) x_t = np.empty(nsteps, dtype=float) y_t = np.empty(nsteps, dtype=float) p_t = np.empty(nsteps, dtype=float) for i in range(nsteps): if i == 0: cond = xy.step({'x': 0., 'y': 0.}, {0}) else: cond = xy.step({'x': x_t[i - 1], 'y': y_t[i - 1]}, {0}) x_t[i], y_t[i] = cond["x'"], cond["y'"] p_xy = xy({'x': x_t[i], 'y': y_t[i]}) p_t[i] = p_xy.prob xy_t = np.array([x_t, y_t]) amn_xy = np.mean(xy_t, axis=1) cov_xy = np.cov(xy_t)
ion() from mpl_toolkits.mplot3d import Axes3D # import needed for 3D projection n_steps = 1000 # Simulate data rand_size = 60 x_range = [-3, 3] slope = 1.5 intercept = -1. y_noise = 0.5 x_obs = np.random.normal(0, 1, size=rand_size) y_obs = np.random.normal(slope * x_obs + intercept, y_noise) # Set up RVs, RFs, and SP x = pb.RV('x', vtype=float, vset=x_range) y = pb.RV('y', vtype=float, vset=[-np.inf, np.inf]) beta_0 = pb.RV('beta_0', vtype=float, vset=[-6., 6.]) beta_1 = pb.RV('beta_1', vtype=float, vset=[-6., 6.]) y_sigma = pb.RV('y_sigma', vtype=float, vset=[(0.001), 10.]) # Define likelihood and conditional functions def norm_reg(x, y, beta_0, beta_1, y_sigma): return scipy.stats.norm.logpdf(y, loc=beta_0 + beta_1 * x, scale=y_sigma) def cond_reg(x, y, beta_0, beta_1,
# Remarginalisation example import collections import probayes as pb import numpy as np from pylab import *; ion() num_samples = 500 num_resamples = 50 x = pb.RV('x', vtype=float, vset=[0, 1]) y = pb.RV('y', vtype=float, vset=[0, 1]) xy = x & y p_xy = xy({num_samples}) xpy = np.linspace(-0.001, 2.001, num_resamples) xmy = np.linspace(-1.001, 1.001, num_resamples) distribution_vals = collections.OrderedDict({'p': xpy, 'm': xmy}) distribution = pb.Distribution('p,m', distribution_vals) mapping = {'p': p_xy['x'] + p_xy['y'], 'm': p_xy['x'] - p_xy['y']} p_pm = p_xy.remarginalise(distribution, mapping) pmf = p_pm.prob[:-1, :-1] figure() pcolor(np.ravel(p_pm['m']), np.ravel(p_pm['p']), pmf, cmap=cm.jet) colorbar()
# Simulation settings sim_size = 1000 # Number of observations to simulate # Simulate data x_obs = np.random.choice([False, True], size=sim_size) y_obs = np.random.choice([False, True], size=sim_size) z_prob = 0.1 + 0.2 * x_obs + 0.4 * y_obs z_obs = np.array( [np.random.choice([False, True], p=[1 - z_p, z_p]) for z_p in z_prob]) zx_obs = np.hstack( [z_obs.reshape([sim_size, 1]), x_obs.reshape([sim_size, 1])]) zy_obs = np.hstack( [z_obs.reshape([sim_size, 1]), y_obs.reshape([sim_size, 1])]) zx_lhood, zx_rfreq = pb.bool_perm_freq(zx_obs, ['z', 'x']) zy_lhood, zy_rfreq = pb.bool_perm_freq(zy_obs, ['z', 'y']) # Naive Bayes x = pb.RV('x', vtype=bool) y = pb.RV('y', vtype=bool) z = pb.RV('z', vtype=bool) zx = z | x zy = z | y zx.set_prob(zx_lhood, passdims=True) zy.set_prob(zy_lhood, passdims=True) zxy = pb.SD(zx, zy) p_zxy = zxy() p_zxy_false = zxy({'x': False, 'y': False}) p_zxy_true = zxy({'x': True, 'y': True})
import scipy.stats from pylab import * ion() n_steps = 12288 prop_stdv = np.sqrt(1) def q(**kwds): x, xprime = kwds['x'], kwds["x'"] y, yprime = kwds['y'], kwds["y'"] return scipy.stats.norm.pdf(yprime, loc=y, scale=prop_stdv) * \ scipy.stats.norm.pdf(xprime, loc=x, scale=prop_stdv) x = pb.RV('x', vtype=float, vset=(-np.inf, np.inf)) y = pb.RV('y', vtype=float, vset=(-np.inf, np.inf)) process = pb.SP(x & y) process.set_prob(scipy.stats.multivariate_normal, [0., 0.], [[2.0, 1.2], [1.2, 2.0]]) process.set_tran(q) lambda_delta = lambda: process.Delta( x=scipy.stats.norm.rvs(loc=0., scale=prop_stdv), y=scipy.stats.norm.rvs(loc=0., scale=prop_stdv)) process.set_delta(lambda_delta) process.set_scores('hastings') process.set_update('metropolis') sampler = process.sampler({'x': 0., 'y': 1.}, stop=n_steps) samples = [sample for sample in sampler] summary = process(samples) n_accept = summary.u.count(True)
""" Example of normally distributed variable specified using sympy.stats """ import sympy import sympy.stats import probayes as pb from pylab import * ion() x = pb.RV('x', vtype=float, vset=[-2, 2]) x.set_prob(sympy.stats.Normal(x[:], mean=0, std=1), pscale='log') fx = x({1000}).rescaled() rx = x({-1000}) figure() subplot(2, 1, 1) plot(fx['x'], fx.prob) xlabel('x') ylabel('Prob / density') gca().set_ylim(0., 1.02 * max(fx.prob)) title("{}".format(x.prob)) subplot(2, 1, 2) hist(rx['x'], 50) xlabel('x (random samples)') ylabel('Freq / counts')
# Example of a joint PMF for two coins import probayes as pb h0 = pb.RV('c0', prob=0.7) h1 = pb.RV('c1', prob=0.4) hh = h0 & h1 HH = hh() m0 = HH({'c0': True}) m1 = HH({'c1': True}) m2 = HH({'c0': True, 'c1': True}) M0 = HH.marginal('c0') M1 = HH.marginal('c1') C0 = HH.conditionalise('c0') C1 = HH.conditionalise('c1') print((HH, HH.prob)) print((m0, m0.prob)) print((m1, m1.prob)) print((m2, m2.prob)) print((M0, M0.prob)) print((M1, M1.prob)) print((C0, C0.prob)) print((C1, C1.prob))
import probayes as pb from pylab import *; ion() # Settings rand_size = 60 rand_mean = 50. rand_stdv = 10. mu_lims = (40, 60) sigma_lims = (5, 20.) resolution = {'mu': {128}, 'sigma': {192}} # Generate data data = np.random.normal(loc=rand_mean, scale=rand_stdv, size=rand_size) # Declare RVs mu = pb.RV('mu', vtype=float, vset=mu_lims) sigma = pb.RV('sigma', vtype=float, vset=sigma_lims) x = pb.RV('x', vtype=float, vset={-pb.OO, pb.OO}) # Set reciprocal prior for sigma sigma.set_ufun(sympy.log(sigma[:])) # Set up params and models paras = pb.RF(mu, sigma) stats = pb.RF(x) model = pb.SD(stats, paras) model.set_prob(sympy.stats.Normal(x[:], mean=mu[:], std=sigma[:]), pscale='log') # Evaluate log probabilities joint = model({x: data, **resolution}, iid=True, joint=True)