예제 #1
0
 def gaussianTheano(xo, yo, amplitude, sigma_x, sigma_y):
     #xo = float(xo)
     #yo = float(yo)
     theta = offset = 0 # for now
     a = (pm.cos(theta)**2)/(2*sigma_x**2) + (pm.sin(theta)**2)/(2*sigma_y**2)
     b = -(pm.sin(2*theta))/(4*sigma_x**2) + (pm.sin(2*theta))/(4*sigma_y**2)
     c = (pm.sin(theta)**2)/(2*sigma_x**2) + (pm.cos(theta)**2)/(2*sigma_y**2)
     gauss = offset+amplitude*pm.exp(-1*(a*((x-xo)**2)+2*b*(x-xo)*(y-yo)+c*((y-yo)**2)))
     return gauss
예제 #2
0
def model_stoch_vol(data, samples=2000):
    """Run stochastic volatility model.

    This model estimates the volatility of a returns series over time.
    Returns are assumed to be T-distributed. lambda (width of
    T-distributed) is assumed to follow a random-walk.

    Parameters
    ----------
    data : pandas.Series
        Return series to model.
    samples : int, optional
        Posterior samples to draw.

    Returns
    -------
    pymc3.sampling.BaseTrace object
        A PyMC3 trace object that contains samples for each parameter
        of the posterior.

    See Also
    --------
    plot_stoch_vol : plotting of tochastic volatility model
    """
    from pymc3.distributions.timeseries import GaussianRandomWalk

    with pm.Model():
        nu = pm.Exponential('nu', 1. / 10, testval=5.)
        sigma = pm.Exponential('sigma', 1. / .02, testval=.1)
        s = GaussianRandomWalk('s', sigma**-2, shape=len(data))
        volatility_process = pm.Deterministic('volatility_process',
                                              pm.exp(-2 * s))
        pm.T('r', nu, lam=volatility_process, observed=data)
        start = pm.find_MAP(vars=[s], fmin=sp.optimize.fmin_l_bfgs_b)

        step = pm.NUTS(scaling=start)
        trace = pm.sample(100, step, progressbar=False)

        # Start next run at the last sampled position.
        step = pm.NUTS(scaling=trace[-1], gamma=.25)
        trace = pm.sample(samples,
                          step,
                          start=trace[-1],
                          progressbar=False,
                          njobs=2)

    return trace
예제 #3
0
def model_stoch_vol(data, samples=2000):
    """Run stochastic volatility model.

    This model estimates the volatility of a returns series over time.
    Returns are assumed to be T-distributed. lambda (width of
    T-distributed) is assumed to follow a random-walk.

    Parameters
    ----------
    data : pandas.Series
        Return series to model.
    samples : int, optional
        Posterior samples to draw.

    Returns
    -------
    model : pymc.Model object
        PyMC3 model containing all random variables.
    trace : pymc3.sampling.BaseTrace object
        A PyMC3 trace object that contains samples for each parameter
        of the posterior.

    See Also
    --------
    plot_stoch_vol : plotting of tochastic volatility model
    """
    from pymc3.distributions.timeseries import GaussianRandomWalk

    with pm.Model() as model:
        nu = pm.Exponential('nu', 1. / 10, testval=5.)
        sigma = pm.Exponential('sigma', 1. / .02, testval=.1)
        s = GaussianRandomWalk('s', sigma**-2, shape=len(data))
        volatility_process = pm.Deterministic('volatility_process',
                                              pm.exp(-2 * s))
        StudentT('r', nu, lam=volatility_process, observed=data)
        start = pm.find_MAP(vars=[s], fmin=sp.optimize.fmin_l_bfgs_b)

        step = pm.NUTS(scaling=start)
        trace = pm.sample(100, step, progressbar=False)

        # Start next run at the last sampled position.
        step = pm.NUTS(scaling=trace[-1], gamma=.25)
        trace = pm.sample(samples, step, start=trace[-1],
                          progressbar=False)

    return model, trace
예제 #4
0
def createSignalModelExponential(data):
  """
    Toy model that treats the first ~10% of the waveform as an exponential.  Does a good job of finding the start time (t_0)
    Since I made this as a toy, its super brittle.  Waveform must be normalized
  """
  with Model() as signal_model:
    switchpoint = Uniform('switchpoint', lower=0, upper=len(data), testval=len(data)/2)
    
    noise_sigma = HalfNormal('noise_sigma', sd=1.)
    
    #Modeling these parameters this way is why wf needs to be normalized
    exp_rate = Uniform('exp_rate', lower=0, upper=.5, testval = 0.05)
    exp_scale = Uniform('exp_scale', lower=0, upper=.5, testval = 0.1)
    
    timestamp = np.arange(0, len(data), dtype=np.float)
    
    rate = switch(switchpoint >= timestamp, 0, exp_rate)
    
    baseline_model = Deterministic('baseline_model', exp_scale * (exp( (timestamp-switchpoint)*rate)-1.) )
    
    baseline_observed = Normal("baseline_observed", mu=baseline_model, sd=noise_sigma, observed= data )
  return signal_model
예제 #5
0
y = nbinom.rvs(mu, 0.5)

with pm.Model() as model:
    # Define priors
    alpha = pm.Uniform('sigma', 0, 100)
    sigma_a = pm.Uniform('sigma_a', 0, 10)
    beta1 = pm.Normal('beta1', 0, sd=100)
    beta2 = pm.Normal('beta2', 0, sd=100)
    beta3 = pm.Normal('beta3', 0, sd=100)

    # priors for random intercept (RI) parameters
    a_param = pm.Normal(
        'a_param',
        np.repeat(0, NGroups),  # mean
        sd=np.repeat(sigma_a, NGroups),  # standard deviation
        shape=NGroups)  # number of RI parameters

    eta = beta1 + beta2 * x1 + beta3 * x2 + a_param[Groups]

    # Define likelihood
    y = pm.NegativeBinomial('y', mu=pm.exp(eta), alpha=alpha, observed=y)

    # Fit
    start = pm.find_MAP()  # Find starting value by optimization
    step = pm.NUTS(state=start)  # Initiate sampling
    trace = pm.sample(7000, step, start=start)

# Print summary to screen
pm.summary(trace)
예제 #6
0
import matplotlib.pyplot as plt
from plot_post import plot_post

# THE DATA.
N = 30
z = 8
y = np.repeat([1, 0], [z, N - z])

# THE MODEL.
with pm.Model() as model:
    # Hyperprior on model index:
    model_index = pm.DiscreteUniform('model_index', lower=0, upper=1)
    # Prior
    nu = pm.Normal('nu', mu=0, tau=0.1)  # it is posible to use tau or sd
    eta = pm.Gamma('eta', .1, .1)
    theta0 = 1 / (1 + pm.exp(-nu))  # theta from model index 0
    theta1 = pm.exp(-eta)  # theta from model index 1
    theta = pm.switch(pm.eq(model_index, 0), theta0, theta1)
    # Likelihood
    y = pm.Bernoulli('y', p=theta, observed=y)
    # Sampling
    start = pm.find_MAP()
    step1 = pm.Metropolis(model.vars[1:])
    step2 = pm.ElemwiseCategoricalStep(var=model_index, values=[0, 1])
    trace = pm.sample(10000, [step1, step2], start=start, progressbar=False)

# EXAMINE THE RESULTS.
burnin = 1000
thin = 5

## Print summary for each trace
print("N: ",N)

# initial prior
# both D and A have mean 1 and std 10
alpha_A=400.0/16.0
beta_A=1.0/16.0
alpha_N=400.0/16.0
beta_N=1.0/16.0
alpha_D=2.0+1.0/1.6
beta_D=100*(alpha_D-1)
delta_t=0.802

with pm.Model() as model:
    D = pm.InverseGamma('D', alpha=alpha_D, beta=beta_D)
    A = pm.Gamma('A', alpha=alpha_A, beta=beta_A)
    B = pm.Deterministic('B', pm.exp(-delta_t * D / A))

    path = lcm.Ornstein_Uhlenbeck('path', D=D, A=A, B=B, observed=time_series)

    start = pm.find_MAP(fmin=sp.optimize.fmin_powell)

    trace = pm.sample(100000, start=start)

pm.summary(trace)

data_dict={ 'D':trace['D'],
            'A':trace['A'],
            'B':trace['B'],
}

df=pd.DataFrame(data_dict)
예제 #8
0
# $$ y \sim \textrm{Normal}(\textrm{exp}(x),2)$$
# $$ z \sim \textrm{Normal}(x + y,0.75)$$
#
# The aim here is to get posteriors over $x$ and $y$ given the data we have about $z$ (`zdata`).
#
# We create a new `Model` objects, and do operations within its context. The `with` lets PyMC know this model is the current model of interest.
#
# We construct new random variables with the constructor for its prior distribution such as `Normal` while within a model context (inside the `with`). When you make a random variable it is automatically added to the model. The constructor returns a Theano variable.
#
# Using the constructor may specify the name of the random variable, the parameters of a random variable's prior distribution, as well as the shape of the random variable. We can specify that a random variable is observed by specifying the data that was observed.

# In[3]:

with pm.Model() as model:
    x = pm.Normal('x', mu=0., sd=1)
    y = pm.Normal('y', mu=pm.exp(x), sd=2., shape=(ndims, 1)) # here, shape is telling us it's a vector rather than a scalar.
    z = pm.Normal('z', mu=x + y, sd=.75, observed=zdata) # shape is inferred from zdata


# A parenthetical note on the parameters for the normal. Variance is encoded as `tau`, indicating precision, which is simply inverse variance (so $\tau=\sigma^{-2}$ ). This is used because the gamma function is the conjugate prior for precision, and must be inverted to get variance. Encoding in terms of precision saves the inversion step in cases where variance is actually modeled using gamma as a prior.

# Fit Model
# ---------
# We need a starting point for our sampling. The `find_MAP` function finds the maximum a posteriori point (MAP), which is often a good choice for starting point. `find_MAP` uses an optimization algorithm (`scipy.optimize.fmin_l_bfgs_b`, or [BFGS](http://en.wikipedia.org/wiki/BFGS_method), by default) to find the local maximum of the log posterior.
#
# Note that this `with` construction is used again. Functions like `find_MAP` and `HamiltonianMC` need to have a model in their context. `with` activates the context of a particular model within its block.

# In[4]:

with model:
    start = pm.find_MAP()
예제 #9
0
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
returns= pd.read_csv("https://raw.githubusercontent.com/pymc-devs/pymc3/master/pymc3/examples/data/SP500.csv",
                     header=-1, parse_dates=True)[2500:2900]

#plt.style.use('ggplot')
#returns.columns =['S&P500']
#returns.plot(figsize=(12,7), c="b")
#plt.show()

from pymc3 import Exponential, T, exp, Deterministic, Model, sample, NUTS, find_MAP, traceplot
from pymc3.distributions.timeseries import GaussianRandomWalk

with Model() as sp500_model:
    nu = Exponential('nu', 1./10, testval=5.)
    sigma = Exponential('sigma', 1./.02, testval=.1)
    s = GaussianRandomWalk('s', sigma**-2, shape=len(returns))
    volatility_process = Deterministic('volatility_process', exp(-2*s))
    r = T('r', nu, lam=1/volatility_process, observed=returns['S&P500'])



예제 #10
0
#
# We create a new `Model` objects, and do operations within its context. The `with` lets PyMC know this model is the current model of interest.
#
# We construct new random variables with the constructor for its prior distribution such as `Normal` while within a model context (inside the `with`). When you make a random variable it is automatically added to the model. The constructor returns a Theano variable.
#
# Using the constructor may specify the name of the random variable, the
# parameters of a random variable's prior distribution, as well as the
# shape of the random variable. We can specify that a random variable is
# observed by specifying the data that was observed.

# In[3]:

with pm.Model() as model:
    x = pm.Normal('x', mu=0., sd=1)
    # here, shape is telling us it's a vector rather than a scalar.
    y = pm.Normal('y', mu=pm.exp(x), sd=2., shape=(ndims, 1))
    # shape is inferred from zdata
    z = pm.Normal('z', mu=x + y, sd=.75, observed=zdata)

# A parenthetical note on the parameters for the normal. Variance is
# encoded as `tau`, indicating precision, which is simply inverse variance
# (so $\tau=\sigma^{-2}$ ). This is used because the gamma function is the
# conjugate prior for precision, and must be inverted to get variance.
# Encoding in terms of precision saves the inversion step in cases where
# variance is actually modeled using gamma as a prior.

# Fit Model
# ---------
# We need a starting point for our sampling. The `find_MAP` function finds the maximum a posteriori point (MAP), which is often a good choice for starting point. `find_MAP` uses an optimization algorithm (`scipy.optimize.fmin_l_bfgs_b`, or [BFGS](http://en.wikipedia.org/wiki/BFGS_method), by default) to find the local maximum of the log posterior.
#
# Note that this `with` construction is used again. Functions like
예제 #11
0
mu = 1.0 / (1.0 + np.exp(-eta))
y = bernoulli.rvs(mu, size=N)

with pm.Model() as model:
    # Define priors
    sigma = pm.Uniform('sigma', 0, 100)
    sigma_a = pm.Uniform('sigma_a', 0, 10)
    beta1 = pm.Normal('beta1', 0, sd=100)
    beta2 = pm.Normal('beta2', 0, sd=100)
    beta3 = pm.Normal('beta3', 0, sd=100)

    # priors for random intercept (RI) parameters
    a_param = pm.Normal(
        'a_param',
        np.repeat(0, NGroups),  # mean
        sd=np.repeat(sigma_a, NGroups),  # standard deviation
        shape=NGroups)  # number of RI parameters

    eta = beta1 + beta2 * x1 + beta3 * x2 + a_param[Groups]

    # Define likelihood
    y = pm.Normal('y', mu=1.0 / (1.0 + pm.exp(-eta)), sd=sigma, observed=y)

    # Fit
    start = pm.find_MAP()  # Find starting value by optimization
    step = pm.NUTS(state=start)  # Initiate sampling
    trace = pm.sample(7000, step, start=start)

# Print summary to screen
pm.summary(trace)
예제 #12
0
print("N: ", N)

# initial prior
# both D and A have mean 1 and std 10
alpha_A = 400.0 / 16.0
beta_A = 1.0 / 16.0
alpha_N = 400.0 / 16.0
beta_N = 1.0 / 16.0
alpha_D = 2.0 + 1.0 / 1.6
beta_D = 100 * (alpha_D - 1)
delta_t = 0.802

with pm.Model() as model:
    D = pm.InverseGamma('D', alpha=alpha_D, beta=beta_D)
    A = pm.Gamma('A', alpha=alpha_A, beta=beta_A)
    B = pm.Deterministic('B', pm.exp(-delta_t * D / A))

    path = lcm.Ornstein_Uhlenbeck('path', D=D, A=A, B=B, observed=time_series)

    start = pm.find_MAP(fmin=sp.optimize.fmin_powell)

    trace = pm.sample(100000, start=start)

pm.summary(trace)

data_dict = {
    'D': trace['D'],
    'A': trace['A'],
    'B': trace['B'],
}
예제 #13
0
mu = np.exp(eta)

y = poisson.rvs(mu, size=N)

with pm.Model() as model:
    # Define priors
    sigma_a = pm.Uniform('sigma_a', 0, 100)
    beta1 = pm.Normal('beta1', 0, sd=100)
    beta2 = pm.Normal('beta2', 0, sd=100)
    beta3 = pm.Normal('beta3', 0, sd=100)

    # priors for random intercept (RI) parameters
    a_param = pm.Normal(
        'a_param',
        np.repeat(0, NGroups),  # mean
        sd=np.repeat(sigma_a, NGroups),  # standard deviation
        shape=NGroups)  # number of RI parameters

    eta = beta1 + beta2 * x1 + beta3 * x2 + a_param[Groups]

    # Define likelihood
    y = pm.Poisson('y', mu=pm.exp(eta), observed=y)

    # Fit
    start = pm.find_MAP()  # Find starting value by optimization
    step = pm.NUTS(state=start)  # Initiate sampling
    trace = pm.sample(20000, step, start=start, progressbar=False)

# Print summary to screen
pm.summary(trace)
예제 #14
0
#print(len(returns))

n = 400
returns = np.genfromtxt(pymc3.get_data_file('pymc3.examples',
                                            "data/SP500.csv"))[-n:]
returns[:5]

plt.plot(returns)
plt.ylabel('daily returns in %')

with pymc3.Model() as sp500_model:
    nu = pymc3.Exponential('nu', 1. / 10, testval=5.)
    sigma = pymc3.Exponential('sigma', 1. / .02, testval=.1)
    s = ts.GaussianRandomWalk('s', sigma**-2, shape=len(returns))
    volatility_process = pymc3.Deterministic('volatility_process',
                                             pymc3.exp(-2 * s))
    r = pymc3.StudentT('r', nu, lam=1 / volatility_process, observed=returns)

with sp500_model:
    print('optimizing...')
    start = pymc3.find_MAP(vars=[s], fmin=scipy.optimize.fmin_l_bfgs_b)

    print('sampling... (slow!)')
    step = pymc3.NUTS(scaling=start)
    trace = pymc3.sample(100, step, progressbar=False)

    # Start next run at the last sampled position.
    step = pymc3.NUTS(scaling=trace[-1], gamma=.25)
    trace = pymc3.sample(1000, step, start=trace[-1], progressbar=False)

pymc3.traceplot(trace, [nu, sigma])
예제 #15
0
#returns = pd.read_csv('SP500.csv')
#print(len(returns))

n = 400
returns = np.genfromtxt(pymc3.get_data_file('pymc3.examples', "data/SP500.csv"))[-n:]
returns[:5]

plt.plot(returns)
plt.ylabel('daily returns in %');


with pymc3.Model() as sp500_model:
    nu = pymc3.Exponential('nu', 1./10, testval=5.)
    sigma = pymc3.Exponential('sigma', 1./.02, testval=.1)
    s = ts.GaussianRandomWalk('s', sigma**-2, shape=len(returns))
    volatility_process =  pymc3.Deterministic('volatility_process', pymc3.exp(-2*s))
    r = pymc3.StudentT('r', nu, lam=1/volatility_process, observed=returns)
    
with sp500_model:
    print 'optimizing...'
    start = pymc3.find_MAP(vars=[s], fmin=scipy.optimize.fmin_l_bfgs_b)
    
    print 'sampling... (slow!)'
    step = pymc3.NUTS(scaling=start)
    trace = pymc3.sample(100, step, progressbar=False)

    # Start next run at the last sampled position.
    step = pymc3.NUTS(scaling=trace[-1], gamma=.25)
    trace = pymc3.sample(1000, step, start=trace[-1], progressbar=False)
    
import matplotlib.pyplot as plt
from plot_post import plot_post

# THE DATA.
N = 30
z = 8
y = np.repeat([1, 0], [z, N-z]) 

# THE MODEL.
with pm.Model() as model:
    # Hyperprior on model index:
    model_index = pm.DiscreteUniform('model_index', lower=0, upper=1)
    # Prior
    nu = pm.Normal('nu', mu=0, tau=0.1) # it is posible to use tau or sd
    eta = pm.Gamma('eta', .1, .1)
    theta0 = 1 / (1 + pm.exp(-nu)) # theta from model index 0
    theta1 = pm.exp(-eta)    # theta from model index 1
    theta = pm.switch(pm.eq(model_index, 0), theta0, theta1)
    # Likelihood
    y = pm.Bernoulli('y', p=theta, observed=y)
    # Sampling
    start = pm.find_MAP()
    step1 = pm.Metropolis(model.vars[1:])
    step2 = pm.ElemwiseCategoricalStep(var=model_index,values=[0,1])
    trace = pm.sample(10000, [step1, step2], start=start, progressbar=False)


# EXAMINE THE RESULTS.
burnin = 1000
thin = 5