def gaussianTheano(xo, yo, amplitude, sigma_x, sigma_y): #xo = float(xo) #yo = float(yo) theta = offset = 0 # for now a = (pm.cos(theta)**2)/(2*sigma_x**2) + (pm.sin(theta)**2)/(2*sigma_y**2) b = -(pm.sin(2*theta))/(4*sigma_x**2) + (pm.sin(2*theta))/(4*sigma_y**2) c = (pm.sin(theta)**2)/(2*sigma_x**2) + (pm.cos(theta)**2)/(2*sigma_y**2) gauss = offset+amplitude*pm.exp(-1*(a*((x-xo)**2)+2*b*(x-xo)*(y-yo)+c*((y-yo)**2))) return gauss
def model_stoch_vol(data, samples=2000): """Run stochastic volatility model. This model estimates the volatility of a returns series over time. Returns are assumed to be T-distributed. lambda (width of T-distributed) is assumed to follow a random-walk. Parameters ---------- data : pandas.Series Return series to model. samples : int, optional Posterior samples to draw. Returns ------- pymc3.sampling.BaseTrace object A PyMC3 trace object that contains samples for each parameter of the posterior. See Also -------- plot_stoch_vol : plotting of tochastic volatility model """ from pymc3.distributions.timeseries import GaussianRandomWalk with pm.Model(): nu = pm.Exponential('nu', 1. / 10, testval=5.) sigma = pm.Exponential('sigma', 1. / .02, testval=.1) s = GaussianRandomWalk('s', sigma**-2, shape=len(data)) volatility_process = pm.Deterministic('volatility_process', pm.exp(-2 * s)) pm.T('r', nu, lam=volatility_process, observed=data) start = pm.find_MAP(vars=[s], fmin=sp.optimize.fmin_l_bfgs_b) step = pm.NUTS(scaling=start) trace = pm.sample(100, step, progressbar=False) # Start next run at the last sampled position. step = pm.NUTS(scaling=trace[-1], gamma=.25) trace = pm.sample(samples, step, start=trace[-1], progressbar=False, njobs=2) return trace
def model_stoch_vol(data, samples=2000): """Run stochastic volatility model. This model estimates the volatility of a returns series over time. Returns are assumed to be T-distributed. lambda (width of T-distributed) is assumed to follow a random-walk. Parameters ---------- data : pandas.Series Return series to model. samples : int, optional Posterior samples to draw. Returns ------- model : pymc.Model object PyMC3 model containing all random variables. trace : pymc3.sampling.BaseTrace object A PyMC3 trace object that contains samples for each parameter of the posterior. See Also -------- plot_stoch_vol : plotting of tochastic volatility model """ from pymc3.distributions.timeseries import GaussianRandomWalk with pm.Model() as model: nu = pm.Exponential('nu', 1. / 10, testval=5.) sigma = pm.Exponential('sigma', 1. / .02, testval=.1) s = GaussianRandomWalk('s', sigma**-2, shape=len(data)) volatility_process = pm.Deterministic('volatility_process', pm.exp(-2 * s)) StudentT('r', nu, lam=volatility_process, observed=data) start = pm.find_MAP(vars=[s], fmin=sp.optimize.fmin_l_bfgs_b) step = pm.NUTS(scaling=start) trace = pm.sample(100, step, progressbar=False) # Start next run at the last sampled position. step = pm.NUTS(scaling=trace[-1], gamma=.25) trace = pm.sample(samples, step, start=trace[-1], progressbar=False) return model, trace
def createSignalModelExponential(data): """ Toy model that treats the first ~10% of the waveform as an exponential. Does a good job of finding the start time (t_0) Since I made this as a toy, its super brittle. Waveform must be normalized """ with Model() as signal_model: switchpoint = Uniform('switchpoint', lower=0, upper=len(data), testval=len(data)/2) noise_sigma = HalfNormal('noise_sigma', sd=1.) #Modeling these parameters this way is why wf needs to be normalized exp_rate = Uniform('exp_rate', lower=0, upper=.5, testval = 0.05) exp_scale = Uniform('exp_scale', lower=0, upper=.5, testval = 0.1) timestamp = np.arange(0, len(data), dtype=np.float) rate = switch(switchpoint >= timestamp, 0, exp_rate) baseline_model = Deterministic('baseline_model', exp_scale * (exp( (timestamp-switchpoint)*rate)-1.) ) baseline_observed = Normal("baseline_observed", mu=baseline_model, sd=noise_sigma, observed= data ) return signal_model
y = nbinom.rvs(mu, 0.5) with pm.Model() as model: # Define priors alpha = pm.Uniform('sigma', 0, 100) sigma_a = pm.Uniform('sigma_a', 0, 10) beta1 = pm.Normal('beta1', 0, sd=100) beta2 = pm.Normal('beta2', 0, sd=100) beta3 = pm.Normal('beta3', 0, sd=100) # priors for random intercept (RI) parameters a_param = pm.Normal( 'a_param', np.repeat(0, NGroups), # mean sd=np.repeat(sigma_a, NGroups), # standard deviation shape=NGroups) # number of RI parameters eta = beta1 + beta2 * x1 + beta3 * x2 + a_param[Groups] # Define likelihood y = pm.NegativeBinomial('y', mu=pm.exp(eta), alpha=alpha, observed=y) # Fit start = pm.find_MAP() # Find starting value by optimization step = pm.NUTS(state=start) # Initiate sampling trace = pm.sample(7000, step, start=start) # Print summary to screen pm.summary(trace)
import matplotlib.pyplot as plt from plot_post import plot_post # THE DATA. N = 30 z = 8 y = np.repeat([1, 0], [z, N - z]) # THE MODEL. with pm.Model() as model: # Hyperprior on model index: model_index = pm.DiscreteUniform('model_index', lower=0, upper=1) # Prior nu = pm.Normal('nu', mu=0, tau=0.1) # it is posible to use tau or sd eta = pm.Gamma('eta', .1, .1) theta0 = 1 / (1 + pm.exp(-nu)) # theta from model index 0 theta1 = pm.exp(-eta) # theta from model index 1 theta = pm.switch(pm.eq(model_index, 0), theta0, theta1) # Likelihood y = pm.Bernoulli('y', p=theta, observed=y) # Sampling start = pm.find_MAP() step1 = pm.Metropolis(model.vars[1:]) step2 = pm.ElemwiseCategoricalStep(var=model_index, values=[0, 1]) trace = pm.sample(10000, [step1, step2], start=start, progressbar=False) # EXAMINE THE RESULTS. burnin = 1000 thin = 5 ## Print summary for each trace
print("N: ",N) # initial prior # both D and A have mean 1 and std 10 alpha_A=400.0/16.0 beta_A=1.0/16.0 alpha_N=400.0/16.0 beta_N=1.0/16.0 alpha_D=2.0+1.0/1.6 beta_D=100*(alpha_D-1) delta_t=0.802 with pm.Model() as model: D = pm.InverseGamma('D', alpha=alpha_D, beta=beta_D) A = pm.Gamma('A', alpha=alpha_A, beta=beta_A) B = pm.Deterministic('B', pm.exp(-delta_t * D / A)) path = lcm.Ornstein_Uhlenbeck('path', D=D, A=A, B=B, observed=time_series) start = pm.find_MAP(fmin=sp.optimize.fmin_powell) trace = pm.sample(100000, start=start) pm.summary(trace) data_dict={ 'D':trace['D'], 'A':trace['A'], 'B':trace['B'], } df=pd.DataFrame(data_dict)
# $$ y \sim \textrm{Normal}(\textrm{exp}(x),2)$$ # $$ z \sim \textrm{Normal}(x + y,0.75)$$ # # The aim here is to get posteriors over $x$ and $y$ given the data we have about $z$ (`zdata`). # # We create a new `Model` objects, and do operations within its context. The `with` lets PyMC know this model is the current model of interest. # # We construct new random variables with the constructor for its prior distribution such as `Normal` while within a model context (inside the `with`). When you make a random variable it is automatically added to the model. The constructor returns a Theano variable. # # Using the constructor may specify the name of the random variable, the parameters of a random variable's prior distribution, as well as the shape of the random variable. We can specify that a random variable is observed by specifying the data that was observed. # In[3]: with pm.Model() as model: x = pm.Normal('x', mu=0., sd=1) y = pm.Normal('y', mu=pm.exp(x), sd=2., shape=(ndims, 1)) # here, shape is telling us it's a vector rather than a scalar. z = pm.Normal('z', mu=x + y, sd=.75, observed=zdata) # shape is inferred from zdata # A parenthetical note on the parameters for the normal. Variance is encoded as `tau`, indicating precision, which is simply inverse variance (so $\tau=\sigma^{-2}$ ). This is used because the gamma function is the conjugate prior for precision, and must be inverted to get variance. Encoding in terms of precision saves the inversion step in cases where variance is actually modeled using gamma as a prior. # Fit Model # --------- # We need a starting point for our sampling. The `find_MAP` function finds the maximum a posteriori point (MAP), which is often a good choice for starting point. `find_MAP` uses an optimization algorithm (`scipy.optimize.fmin_l_bfgs_b`, or [BFGS](http://en.wikipedia.org/wiki/BFGS_method), by default) to find the local maximum of the log posterior. # # Note that this `with` construction is used again. Functions like `find_MAP` and `HamiltonianMC` need to have a model in their context. `with` activates the context of a particular model within its block. # In[4]: with model: start = pm.find_MAP()
import matplotlib.pyplot as plt import numpy as np import pandas as pd returns= pd.read_csv("https://raw.githubusercontent.com/pymc-devs/pymc3/master/pymc3/examples/data/SP500.csv", header=-1, parse_dates=True)[2500:2900] #plt.style.use('ggplot') #returns.columns =['S&P500'] #returns.plot(figsize=(12,7), c="b") #plt.show() from pymc3 import Exponential, T, exp, Deterministic, Model, sample, NUTS, find_MAP, traceplot from pymc3.distributions.timeseries import GaussianRandomWalk with Model() as sp500_model: nu = Exponential('nu', 1./10, testval=5.) sigma = Exponential('sigma', 1./.02, testval=.1) s = GaussianRandomWalk('s', sigma**-2, shape=len(returns)) volatility_process = Deterministic('volatility_process', exp(-2*s)) r = T('r', nu, lam=1/volatility_process, observed=returns['S&P500'])
# # We create a new `Model` objects, and do operations within its context. The `with` lets PyMC know this model is the current model of interest. # # We construct new random variables with the constructor for its prior distribution such as `Normal` while within a model context (inside the `with`). When you make a random variable it is automatically added to the model. The constructor returns a Theano variable. # # Using the constructor may specify the name of the random variable, the # parameters of a random variable's prior distribution, as well as the # shape of the random variable. We can specify that a random variable is # observed by specifying the data that was observed. # In[3]: with pm.Model() as model: x = pm.Normal('x', mu=0., sd=1) # here, shape is telling us it's a vector rather than a scalar. y = pm.Normal('y', mu=pm.exp(x), sd=2., shape=(ndims, 1)) # shape is inferred from zdata z = pm.Normal('z', mu=x + y, sd=.75, observed=zdata) # A parenthetical note on the parameters for the normal. Variance is # encoded as `tau`, indicating precision, which is simply inverse variance # (so $\tau=\sigma^{-2}$ ). This is used because the gamma function is the # conjugate prior for precision, and must be inverted to get variance. # Encoding in terms of precision saves the inversion step in cases where # variance is actually modeled using gamma as a prior. # Fit Model # --------- # We need a starting point for our sampling. The `find_MAP` function finds the maximum a posteriori point (MAP), which is often a good choice for starting point. `find_MAP` uses an optimization algorithm (`scipy.optimize.fmin_l_bfgs_b`, or [BFGS](http://en.wikipedia.org/wiki/BFGS_method), by default) to find the local maximum of the log posterior. # # Note that this `with` construction is used again. Functions like
mu = 1.0 / (1.0 + np.exp(-eta)) y = bernoulli.rvs(mu, size=N) with pm.Model() as model: # Define priors sigma = pm.Uniform('sigma', 0, 100) sigma_a = pm.Uniform('sigma_a', 0, 10) beta1 = pm.Normal('beta1', 0, sd=100) beta2 = pm.Normal('beta2', 0, sd=100) beta3 = pm.Normal('beta3', 0, sd=100) # priors for random intercept (RI) parameters a_param = pm.Normal( 'a_param', np.repeat(0, NGroups), # mean sd=np.repeat(sigma_a, NGroups), # standard deviation shape=NGroups) # number of RI parameters eta = beta1 + beta2 * x1 + beta3 * x2 + a_param[Groups] # Define likelihood y = pm.Normal('y', mu=1.0 / (1.0 + pm.exp(-eta)), sd=sigma, observed=y) # Fit start = pm.find_MAP() # Find starting value by optimization step = pm.NUTS(state=start) # Initiate sampling trace = pm.sample(7000, step, start=start) # Print summary to screen pm.summary(trace)
print("N: ", N) # initial prior # both D and A have mean 1 and std 10 alpha_A = 400.0 / 16.0 beta_A = 1.0 / 16.0 alpha_N = 400.0 / 16.0 beta_N = 1.0 / 16.0 alpha_D = 2.0 + 1.0 / 1.6 beta_D = 100 * (alpha_D - 1) delta_t = 0.802 with pm.Model() as model: D = pm.InverseGamma('D', alpha=alpha_D, beta=beta_D) A = pm.Gamma('A', alpha=alpha_A, beta=beta_A) B = pm.Deterministic('B', pm.exp(-delta_t * D / A)) path = lcm.Ornstein_Uhlenbeck('path', D=D, A=A, B=B, observed=time_series) start = pm.find_MAP(fmin=sp.optimize.fmin_powell) trace = pm.sample(100000, start=start) pm.summary(trace) data_dict = { 'D': trace['D'], 'A': trace['A'], 'B': trace['B'], }
mu = np.exp(eta) y = poisson.rvs(mu, size=N) with pm.Model() as model: # Define priors sigma_a = pm.Uniform('sigma_a', 0, 100) beta1 = pm.Normal('beta1', 0, sd=100) beta2 = pm.Normal('beta2', 0, sd=100) beta3 = pm.Normal('beta3', 0, sd=100) # priors for random intercept (RI) parameters a_param = pm.Normal( 'a_param', np.repeat(0, NGroups), # mean sd=np.repeat(sigma_a, NGroups), # standard deviation shape=NGroups) # number of RI parameters eta = beta1 + beta2 * x1 + beta3 * x2 + a_param[Groups] # Define likelihood y = pm.Poisson('y', mu=pm.exp(eta), observed=y) # Fit start = pm.find_MAP() # Find starting value by optimization step = pm.NUTS(state=start) # Initiate sampling trace = pm.sample(20000, step, start=start, progressbar=False) # Print summary to screen pm.summary(trace)
#print(len(returns)) n = 400 returns = np.genfromtxt(pymc3.get_data_file('pymc3.examples', "data/SP500.csv"))[-n:] returns[:5] plt.plot(returns) plt.ylabel('daily returns in %') with pymc3.Model() as sp500_model: nu = pymc3.Exponential('nu', 1. / 10, testval=5.) sigma = pymc3.Exponential('sigma', 1. / .02, testval=.1) s = ts.GaussianRandomWalk('s', sigma**-2, shape=len(returns)) volatility_process = pymc3.Deterministic('volatility_process', pymc3.exp(-2 * s)) r = pymc3.StudentT('r', nu, lam=1 / volatility_process, observed=returns) with sp500_model: print('optimizing...') start = pymc3.find_MAP(vars=[s], fmin=scipy.optimize.fmin_l_bfgs_b) print('sampling... (slow!)') step = pymc3.NUTS(scaling=start) trace = pymc3.sample(100, step, progressbar=False) # Start next run at the last sampled position. step = pymc3.NUTS(scaling=trace[-1], gamma=.25) trace = pymc3.sample(1000, step, start=trace[-1], progressbar=False) pymc3.traceplot(trace, [nu, sigma])
#returns = pd.read_csv('SP500.csv') #print(len(returns)) n = 400 returns = np.genfromtxt(pymc3.get_data_file('pymc3.examples', "data/SP500.csv"))[-n:] returns[:5] plt.plot(returns) plt.ylabel('daily returns in %'); with pymc3.Model() as sp500_model: nu = pymc3.Exponential('nu', 1./10, testval=5.) sigma = pymc3.Exponential('sigma', 1./.02, testval=.1) s = ts.GaussianRandomWalk('s', sigma**-2, shape=len(returns)) volatility_process = pymc3.Deterministic('volatility_process', pymc3.exp(-2*s)) r = pymc3.StudentT('r', nu, lam=1/volatility_process, observed=returns) with sp500_model: print 'optimizing...' start = pymc3.find_MAP(vars=[s], fmin=scipy.optimize.fmin_l_bfgs_b) print 'sampling... (slow!)' step = pymc3.NUTS(scaling=start) trace = pymc3.sample(100, step, progressbar=False) # Start next run at the last sampled position. step = pymc3.NUTS(scaling=trace[-1], gamma=.25) trace = pymc3.sample(1000, step, start=trace[-1], progressbar=False)
import matplotlib.pyplot as plt from plot_post import plot_post # THE DATA. N = 30 z = 8 y = np.repeat([1, 0], [z, N-z]) # THE MODEL. with pm.Model() as model: # Hyperprior on model index: model_index = pm.DiscreteUniform('model_index', lower=0, upper=1) # Prior nu = pm.Normal('nu', mu=0, tau=0.1) # it is posible to use tau or sd eta = pm.Gamma('eta', .1, .1) theta0 = 1 / (1 + pm.exp(-nu)) # theta from model index 0 theta1 = pm.exp(-eta) # theta from model index 1 theta = pm.switch(pm.eq(model_index, 0), theta0, theta1) # Likelihood y = pm.Bernoulli('y', p=theta, observed=y) # Sampling start = pm.find_MAP() step1 = pm.Metropolis(model.vars[1:]) step2 = pm.ElemwiseCategoricalStep(var=model_index,values=[0,1]) trace = pm.sample(10000, [step1, step2], start=start, progressbar=False) # EXAMINE THE RESULTS. burnin = 1000 thin = 5