def get_city_data(): """Helper to get city data""" data = pd.read_csv(pm.get_data("srrs2.dat")) cty_data = pd.read_csv(pm.get_data("cty.dat")) data = data[data.state == "MN"] data["fips"] = data.stfips * 1000 + data.cntyfips cty_data["fips"] = cty_data.stfips * 1000 + cty_data.ctfips data["lradon"] = np.log(np.where(data.activity == 0, 0.1, data.activity)) data = data.merge(cty_data, "inner", on="fips") unique = data[["fips"]].drop_duplicates() unique["group"] = np.arange(len(unique)) unique.set_index("fips") return data.merge(unique, "inner", on="fips")
def get_city_data(): """Helper to get city data""" data = pd.read_csv(pm.get_data('srrs2.dat')) cty_data = pd.read_csv(pm.get_data('cty.dat')) data = data[data.state == 'MN'] data['fips'] = data.stfips * 1000 + data.cntyfips cty_data['fips'] = cty_data.stfips * 1000 + cty_data.ctfips data['lradon'] = np.log(np.where(data.activity == 0, .1, data.activity)) data = data.merge(cty_data, 'inner', on='fips') unique = data[['fips']].drop_duplicates() unique['group'] = np.arange(len(unique)) unique.set_index('fips') return data.merge(unique, 'inner', on='fips')
def time_glm_hierarchical(self): data = pd.read_csv(pm.get_data('radon.csv')) data['log_radon'] = data['log_radon'].astype(theano.config.floatX) county_idx = data.county_code.values n_counties = len(data.county.unique()) with pm.Model(): # Hyperpriors for group nodes mu_a = pm.Normal('mu_a', mu=0., sd=100**2) sigma_a = pm.HalfCauchy('sigma_a', 5) mu_b = pm.Normal('mu_b', mu=0., sd=100**2) sigma_b = pm.HalfCauchy('sigma_b', 5) # Intercept for each county, distributed around group mean mu_a # Above we just set mu and sd to a fixed value while here we # plug in a common group distribution for all a and b (which are # vectors of length n_counties). a = pm.Normal('a', mu=mu_a, sd=sigma_a, shape=n_counties) # Intercept for each county, distributed around group mean mu_a b = pm.Normal('b', mu=mu_b, sd=sigma_b, shape=n_counties) # Model error eps = pm.HalfCauchy('eps', 5) radon_est = a[county_idx] + b[county_idx] * data.floor.values # Data likelihood pm.Normal('radon_like', mu=radon_est, sd=eps, observed=data.log_radon) pm.sample(draws=2000, njobs=4)
def setup(self, step, init): """Initialize model and get start position""" np.random.seed(123) self.chains = 4 data = pd.read_csv(pm.get_data('radon.csv')) data['log_radon'] = data['log_radon'].astype(theano.config.floatX) county_idx = data.county_code.values n_counties = len(data.county.unique()) with pm.Model() as self.model: mu_a = pm.Normal('mu_a', mu=0., sd=100**2) sigma_a = pm.HalfCauchy('sigma_a', 5) mu_b = pm.Normal('mu_b', mu=0., sd=100**2) sigma_b = pm.HalfCauchy('sigma_b', 5) a = pm.Normal('a', mu=mu_a, sd=sigma_a, shape=n_counties) b = pm.Normal('b', mu=mu_b, sd=sigma_b, shape=n_counties) eps = pm.HalfCauchy('eps', 5) radon_est = a[county_idx] + b[county_idx] * data.floor.values pm.Normal('radon_like', mu=radon_est, sd=eps, observed=data.log_radon) self.start, _ = pm.init_nuts(chains=self.chains, init=init)
def build_model(self): data = pd.read_csv(pm.get_data('wells.dat'), delimiter=u' ', index_col=u'id', dtype={u'switch': np.int8}) data.dist /= 100 data.educ /= 4 col = data.columns P = data[col[1:]] P -= P.mean() P['1'] = 1 with pm.Model() as model: effects = pm.Normal('effects', mu=0, tau=100. ** -2, shape=len(P.columns)) p = tt.nnet.sigmoid(tt.dot(floatX(np.array(P)), effects)) pm.Bernoulli('s', p, observed=floatX(np.array(data.switch))) return model
def build_model(self): data = pd.read_csv(pm.get_data('wells.dat'), delimiter=' ', index_col='id', dtype={'switch': np.int8}) data.dist /= 100 data.educ /= 4 col = data.columns P = data[col[1:]] P -= P.mean() P['1'] = 1 with pm.Model() as model: effects = pm.Normal('effects', mu=0, sigma=100, shape=len(P.columns)) logit_p = tt.dot(floatX(np.array(P)), effects) pm.Bernoulli('s', logit_p=logit_p, observed=floatX(data.switch.values)) return model
def build_model(): data = np.loadtxt(pm.get_data('efron-morris-75-data.tsv'), delimiter="\t", skiprows=1, usecols=(2,3)) atbats = pm.floatX(data[:,0]) hits = pm.floatX(data[:,1]) N = len(hits) # we want to bound the kappa below BoundedKappa = pm.Bound(pm.Pareto, lower=1.0) with pm.Model() as model: phi = pm.Uniform('phi', lower=0.0, upper=1.0) kappa = BoundedKappa('kappa', alpha=1.0001, m=1.5) thetas = pm.Beta('thetas', alpha=phi*kappa, beta=(1.0-phi)*kappa, shape=N) ys = pm.Binomial('ys', n=atbats, p=thetas, observed=hits) return model
def build_model(self): data = pd.read_csv( pm.get_data("wells.dat"), delimiter=" ", index_col="id", dtype={"switch": np.int8}, ) data.dist /= 100 data.educ /= 4 col = data.columns P = data[col[1:]] P -= P.mean() P["1"] = 1 with pm.Model() as model: effects = pm.Normal("effects", mu=0, sigma=100, shape=len(P.columns)) logit_p = at.dot(floatX(np.array(P)), effects) pm.Bernoulli("s", logit_p=logit_p, observed=floatX(data.switch.values)) return model
def build_model(): data = np.loadtxt( pm.get_data("efron-morris-75-data.tsv"), delimiter="\t", skiprows=1, usecols=(2, 3) ) atbats = pm.floatX(data[:, 0]) hits = pm.floatX(data[:, 1]) N = len(hits) # we want to bound the kappa below BoundedKappa = pm.Bound(pm.Pareto, lower=1.0) with pm.Model() as model: phi = pm.Uniform("phi", lower=0.0, upper=1.0) kappa = BoundedKappa("kappa", alpha=1.0001, m=1.5) thetas = pm.Beta("thetas", alpha=phi * kappa, beta=(1.0 - phi) * kappa, shape=N) ys = pm.Binomial("ys", n=atbats, p=thetas, observed=hits) return model
def time_glm_hierarchical(self): data = pd.read_csv(pm.get_data('radon.csv')) data['log_radon'] = data['log_radon'].astype(theano.config.floatX) county_idx = data.county_code.values n_counties = len(data.county.unique()) with pm.Model(): mu_a = pm.Normal('mu_a', mu=0., sd=100**2) sigma_a = pm.HalfCauchy('sigma_a', 5) mu_b = pm.Normal('mu_b', mu=0., sd=100**2) sigma_b = pm.HalfCauchy('sigma_b', 5) a = pm.Normal('a', mu=mu_a, sd=sigma_a, shape=n_counties) b = pm.Normal('b', mu=mu_b, sd=sigma_b, shape=n_counties) eps = pm.HalfCauchy('eps', 5) radon_est = a[county_idx] + b[county_idx] * data.floor.values pm.Normal('radon_like', mu=radon_est, sd=eps, observed=data.log_radon) pm.sample(draws=2000, njobs=4)
def glm_hierarchical_model(random_seed=123): """Sample glm hierarchical model to use in benchmarks""" np.random.seed(random_seed) data = pd.read_csv(pm.get_data("radon.csv")) data["log_radon"] = data["log_radon"].astype(aesara.config.floatX) county_idx = data.county_code.values n_counties = len(data.county.unique()) with pm.Model() as model: mu_a = pm.Normal("mu_a", mu=0.0, sd=100**2) sigma_a = pm.HalfCauchy("sigma_a", 5) mu_b = pm.Normal("mu_b", mu=0.0, sd=100**2) sigma_b = pm.HalfCauchy("sigma_b", 5) a = pm.Normal("a", mu=0, sd=1, shape=n_counties) b = pm.Normal("b", mu=0, sd=1, shape=n_counties) a = mu_a + sigma_a * a b = mu_b + sigma_b * b eps = pm.HalfCauchy("eps", 5) radon_est = a[county_idx] + b[county_idx] * data.floor.values pm.Normal("radon_like", mu=radon_est, sd=eps, observed=data.log_radon) return model
def glm_hierarchical_model(random_seed=123): """Sample glm hierarchical model to use in benchmarks""" np.random.seed(random_seed) data = pd.read_csv(pm.get_data('radon.csv')) data['log_radon'] = data['log_radon'].astype(theano.config.floatX) county_idx = data.county_code.values n_counties = len(data.county.unique()) with pm.Model() as model: mu_a = pm.Normal('mu_a', mu=0., sd=100**2) sigma_a = pm.HalfCauchy('sigma_a', 5) mu_b = pm.Normal('mu_b', mu=0., sd=100**2) sigma_b = pm.HalfCauchy('sigma_b', 5) a = pm.Normal('a', mu=0, sd=1, shape=n_counties) b = pm.Normal('b', mu=0, sd=1, shape=n_counties) a = mu_a + sigma_a * a b = mu_b + sigma_b * b eps = pm.HalfCauchy('eps', 5) radon_est = a[county_idx] + b[county_idx] * data.floor.values pm.Normal('radon_like', mu=radon_est, sd=eps, observed=data.log_radon) return model
""" Getting started with pymc3. Based on https://docs.pymc.io/notebooks/getting_started.html """ # %% import pandas as pd import pymc3 as pm import matplotlib.pyplot as plt import numpy as np returns = pd.read_csv( pm.get_data("SP500.csv"), parse_dates=True, index_col=0, usecols=["Date", "change"] ).query("Date < '2009-12-31'") returns # %% returns.plot(figsize=(10, 6)) plt.ylabel("daily returns in %") # %% with pm.Model() as sp500_model: nu = pm.Exponential("nu", 1 / 10.0, testval=5.0) sigma = pm.Exponential("sigma", 1 / 0.02, testval=0.1) s = pm.GaussianRandomWalk("s", sigma=sigma, shape=len(returns)) volatility_process = pm.Deterministic( "volatility_process", pm.math.exp(-2 * s) ** 0.5 ) r = pm.StudentT("r", nu=nu, sigma=volatility_process, observed=returns["change"])
import matplotlib.pyplot as plt import numpy as np import pymc3 as pm import pandas as pd import theano """ Hierachical GLM. """ data = pd.read_csv(pm.get_data("radon.csv")) data["log_radon"] = data["log_radon"].astype(theano.config.floatX) county_names = data.county.unique() county_idx = data.county_code.values n_counties = len(data.county.unique()) # Unpooled (non-hierarchical model) with pm.Model() as unpooled_model: # Independent parameters for each county a = pm.Normal("a", 0, sigma=100, shape=n_counties) b = pm.Normal("a", 0, sigma=100, shape=n_counties) # Model error eps = pm.HalfCauchy("eps", 5) # Model prediction of radon level # a[county_idx] translates to a[0, 0, 0, 1, 1, ...], # we thus link multiple household measures of a county
#https://docs.pymc.io/notebooks/stochastic_volatility.html import numpy as np import matplotlib.pyplot as plt import seaborn as sns sns.set_context('talk') import pymc3 as pm from pymc3.distributions.timeseries import GaussianRandomWalk from scipy import optimize import pandas as pd n = 400 returns = pd.read_csv(pm.get_data("SP500.csv"), index_col='date')['change'] returns[:5] fig, ax = plt.subplots(figsize=(14, 8)) returns.plot(label='S&P500') ax.set(xlabel='time', ylabel='returns') ax.legend() with pm.Model() as model: step_size = pm.Exponential('sigma', 50.) s = GaussianRandomWalk('s', sigma=step_size, shape=len(returns)) nu = pm.Exponential('nu', .1) r = pm.StudentT('r', nu=nu, lam=pm.math.exp(-2 * s), observed=returns) with model: trace = pm.sample(tune=2000, target_accept=0.9) pm.traceplot(trace, var_names=['sigma', 'nu'])
#!/usr/bin/env python # coding: utf-8 # In[2]: get_ipython().run_line_magic('matplotlib', 'inline') import matplotlib.pyplot as plt import numpy as np import pymc3 as pm import pandas as pd import theano data = pd.read_csv(pm.get_data('radon.csv')) data['log_radon'] = data['log_radon'].astype(theano.config.floatX) county_names = data.county.unique() county_idx = data.county_code.values n_counties = len(data.county.unique()) # In[4]: data[['county', 'log_radon', 'floor']].head()
for density estimation problem. source: https://docs.pymc.io/notebooks/dp_mix.html """ from matplotlib import pyplot as plt import numpy as np import pymc3 as pm import scipy as sp import seaborn as sns from theano import tensor as tt import pandas as pd SEED = 5132290 # from random.org np.random.seed(SEED) old_faithful_df = pd.read_csv(pm.get_data('old_faithful.csv')) old_faithful_df['std_waiting'] = ( old_faithful_df.waiting - old_faithful_df.waiting.mean()) / old_faithful_df.waiting.std() # fig, ax = plt.subplots(figsize=(8, 6)) # n_bins = 20 # ax.hist(old_faithful_df.std_waiting, bins=n_bins, lw=0, alpha=0.5) # ax.set_xlabel('Standardized waiting time between eruptions') # ax.set_ylabel('Number of eruptions') # plt.show() N = old_faithful_df.shape[0] # num of components = num of data points
# # Demonstrates the usage of hierarchical partial pooling # See http://mc-stan.org/documentation/case-studies/pool-binary-trials.html for more details # import pymc3 as pm import numpy as np import theano data = np.loadtxt(pm.get_data('efron-morris-75-data.tsv'), delimiter="\t", skiprows=1, usecols=(2, 3)) atBats = data[:, 0].astype(theano.config.floatX) hits = data[:, 1].astype(theano.config.floatX) N = len(hits) model = pm.Model() # we want to bound the kappa below BoundedKappa = pm.Bound(pm.Pareto, lower=1.0) with model: phi = pm.Uniform('phi', lower=0.0, upper=1.0) kappa = BoundedKappa('kappa', alpha=1.0001, m=1.5) thetas = pm.Beta('thetas', alpha=phi * kappa, beta=(1.0 - phi) * kappa, shape=N)
import pymc3 as pm import pandas as pd from numpy.ma import masked_values # Import data, filling missing values with sentinels (-999) test_scores = pd.read_csv(pm.get_data("test_scores.csv")).fillna(-999) # Extract variables: test score, gender, number of siblings, previous disability, age, # mother with HS education or better, hearing loss identified by 3 months # of age (score, male, siblings, disability, age, mother_hs, early_ident) = (test_scores[[ "score", "male", "siblings", "prev_disab", "age_test", "mother_hs", "early_ident" ]].astype(float).values.T) with pm.Model() as model: # Impute missing values sib_mean = pm.Exponential("sib_mean", 1.0) siblings_imp = pm.Poisson("siblings_imp", sib_mean, observed=siblings) p_disab = pm.Beta("p_disab", 1.0, 1.0) disability_imp = pm.Bernoulli("disability_imp", p_disab, observed=masked_values(disability, value=-999)) p_mother = pm.Beta("p_mother", 1.0, 1.0) mother_imp = pm.Bernoulli("mother_imp", p_mother, observed=masked_values(mother_hs, value=-999))
import pymc3 as pm import numpy as np import pandas as pd from matplotlib import pyplot as plt plt.style.use('seaborn-darkgrid') data = pd.read_csv(pm.get_data("ml_100k_u.data"), sep='\t', names=["userid", "itemid", "rating", "timestamp"]) movie_columns = [ 'movie id', 'movie title', 'release date', 'video release date', 'IMDb URL', 'unknown', 'Action', 'Adventure', 'Animation', "Children's", 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror', 'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western' ] movies = pd.read_csv(pm.get_data("ml_100k_u.item"), sep="|", names=movie_columns, index_col="movie id", parse_dates=['release date']) # Extract the ratings from the DataFrame ratings = data.rating movie_means = data.join(movies['movie title'], on='itemid').groupby('movie title').rating.mean() user_means = data.groupby('userid').rating.mean().sort_values()
import pymc3 as pm import pandas as pd from numpy.ma import masked_values # Import data, filling missing values with sentinels (-999) test_scores = pd.read_csv(pm.get_data('test_scores.csv')).fillna(-999) # Extract variables: test score, gender, number of siblings, previous disability, age, # mother with HS education or better, hearing loss identified by 3 months # of age (score, male, siblings, disability, age, mother_hs, early_ident) = test_scores[['score', 'male', 'siblings', 'prev_disab', 'age_test', 'mother_hs', 'early_ident']].astype(float).values.T with pm.Model() as model: # Impute missing values sib_mean = pm.Exponential('sib_mean', 1.) siblings_imp = pm.Poisson('siblings_imp', sib_mean, observed=siblings) p_disab = pm.Beta('p_disab', 1., 1.) disability_imp = pm.Bernoulli( 'disability_imp', p_disab, observed=masked_values(disability, value=-999)) p_mother = pm.Beta('p_mother', 1., 1.) mother_imp = pm.Bernoulli('mother_imp', p_mother, observed=masked_values(mother_hs, value=-999)) s = pm.HalfCauchy('s', 5., testval=5) beta = pm.Laplace('beta', 0., 100., shape=7, testval=.1)
# of the unpooled county estimates and the pooled estimates. # y = a[cluster] + b[cluster] * x # adaptive prior for a[cluster] = Normal(a_bar, a_sigma_bar) # adaptive prior for b[cluster] = Normal(b_bar, b_sigma_bar) # prior for a_bar # prior for a_sigma_bar # prior for b_bar # prior for b_sigma_bar # partially pooled taking into account the relationship between a and b in the data # by drawin on the variance-covariance matrix for the parameters a and b # so here the adaptive prior uses this, draws from this? # ----------------------------------------------------------------------------- # Import radon data srrs2 = pd.read_csv(get_data('srrs2.dat')) srrs2.columns = srrs2.columns.map(str.strip) srrs_mn = srrs2[srrs2.state == 'MN'].copy() srrs_mn.shape srrs_mn.head() srrs_mn['fips'] = srrs_mn.stfips * 1000 + srrs_mn.cntyfips cty = pd.read_csv(get_data('cty.dat')) cty_mn = cty[cty.st == 'MN'].copy() cty_mn['fips'] = 1000 * cty_mn.stfips + cty_mn.ctfips srrs_mn = srrs_mn.merge(cty_mn[['fips', 'Uppm']], on='fips') srrs_mn = srrs_mn.drop_duplicates(subset='idnum') u = np.log(srrs_mn.Uppm) n = len(srrs_mn)
import matplotlib.pyplot as plt import numpy as np import pandas as pd from pymc3 import HalfCauchy, Model, Normal, get_data, sample from pymc3.distributions.timeseries import GaussianRandomWalk data = pd.read_csv(get_data("pancreatitis.csv")) countries = ["CYP", "DNK", "ESP", "FIN", "GBR", "ISL"] data = data[data.area.isin(countries)] age = data["age"] = np.array(data.age_start + data.age_end) / 2 rate = data.value = data.value * 1000 group, countries = pd.factorize(data.area, order=countries) ncountries = len(countries) for i, country in enumerate(countries): plt.subplot(2, 3, i + 1) plt.title(country) d = data[data.area == country] plt.plot(d.age, d.value, ".") plt.ylim(0, rate.max()) nknots = 10 knots = np.linspace(data.age_start.min(), data.age_end.max(), nknots) def interpolate(x0, y0, x, group): x = np.array(x)
import numpy as np import pandas as pd import matplotlib.pyplot as plt from pymc3 import HalfCauchy, Model, Normal, get_data, sample from pymc3.distributions.timeseries import GaussianRandomWalk data = pd.read_csv(get_data('pancreatitis.csv')) countries = ['CYP', 'DNK', 'ESP', 'FIN', 'GBR', 'ISL'] data = data[data.area.isin(countries)] age = data['age'] = np.array(data.age_start + data.age_end) / 2 rate = data.value = data.value * 1000 group, countries = pd.factorize(data.area, order=countries) ncountries = len(countries) for i, country in enumerate(countries): plt.subplot(2, 3, i + 1) plt.title(country) d = data[data.area == country] plt.plot(d.age, d.value, '.') plt.ylim(0, rate.max()) nknots = 10 knots = np.linspace(data.age_start.min(), data.age_end.max(), nknots)
import pandas as pd import plotly.offline as py import plotly.graph_objs as go import datetime as dt import numpy as np py.init_notebook_mode(connected=True) def dates_to_idx(timelist): reference_time = pd.to_datetime('1958-03-15') t = (timelist - reference_time) / pd.Timedelta(1, "Y") return np.asarray(t) data_monthly = pd.read_csv(pm.get_data("monthly_in_situ_co2_mlo.csv"), header=56) data_monthly.replace(to_replace=-99.99, value=np.nan, inplace=True) cols = [ "year", "month", "--", "--", "CO2", "seasonaly_adjusted", "fit", "seasonally_adjusted_fit", "CO2_filled", "seasonally_adjusted_filled" ] data_monthly.columns = cols cols.remove("--") cols.remove("--") data_monthly = data_monthly[cols] data_monthly["day"] = 15 data_monthly.index = pd.to_datetime(data_monthly[["year", "month", "day"]]) cols.remove("year")
# -*- coding: utf-8 -*- """ Created on Mon Oct 21 17:10:07 2019 @author: Alex https://docs.pymc.io/notebooks/hierarchical_partial_pooling.html """ import pymc3 as pm import numpy as np import matplotlib.pyplot as plt import pandas as pd import theano.tensor as tt dataBB = pd.read_csv(pm.get_data('efron-morris-75-data.tsv'), sep="\t") at_bats, hits = dataBB[['At-Bats', 'Hits']].values.T #%% N = len(hits) with pm.Model() as baseball_model: phi = pm.Uniform('phi', lower=0.0, upper=1.0) kappa_log = pm.Exponential('kappa_log', lam=1.5) kappa = pm.Deterministic('kappa', tt.exp(kappa_log)) thetas = pm.Beta('thetas', alpha=phi * kappa, beta=(1.0 - phi) * kappa, shape=N)
#%matplotlib inline import numpy as np import pandas as pd from pymc3 import __version__ import matplotlib.pyplot as plt import seaborn as sns plt.style.use('seaborn-darkgrid') print('Running on PyMC3 v{}'.format(__version__)) from pymc3 import get_data # Import radon data srrs2 = pd.read_csv(get_data('srrs2.dat')) srrs2.columns = srrs2.columns.map(str.strip) srrs_mn = srrs2[srrs2.state == 'MN'].copy() """ import pandas as pd import numpy as np import seaborn as sns #import pandas.util.testing as tm df = pd.read_csv('inverts_database_CSV.csv', encoding = "ISO-8859-1", engine='python') df = df.drop(['Date','management', 'Ecoregion', 'den_per_HA', 'density_for_summed_transect area_#permeter'], axis=1) df = df.drop(['den_for_transect_#permeter', 'total transect area', 'Transect_area', 't_Width', 't_Length', 'Abundance', 'habitat_type', 'total number of transect'], axis= 1) df['den'] = df['den_per_100m2'] df['den'] = pd.to_numeric(df['den'], errors='coerce') # make den a float print(df.head())
import numpy as np import pandas as pd import matplotlib.pyplot as plt from pymc3 import HalfCauchy, Model, Normal, get_data, sample from pymc3.distributions.timeseries import GaussianRandomWalk data = pd.read_csv(get_data('pancreatitis.csv')) countries = ['CYP', 'DNK', 'ESP', 'FIN', 'GBR', 'ISL'] data = data[data.area.isin(countries)] age = data['age'] = np.array(data.age_start + data.age_end) / 2 rate = data.value = data.value * 1000 group, countries = pd.factorize(data.area, order=countries) ncountries = len(countries) for i, country in enumerate(countries): plt.subplot(2, 3, i + 1) plt.title(country) d = data[data.area == country] plt.plot(d.age, d.value, '.') plt.ylim(0, rate.max()) nknots = 10 knots = np.linspace(data.age_start.min(), data.age_end.max(), nknots) def interpolate(x0, y0, x, group): x = np.array(x)
Created on Tue Jul 9 13:38:43 2019 @author: Alex """ import numpy as np import matplotlib.pyplot as plt plt.style.use('seaborn-darkgrid') import pymc3 as pm print('Running on PyMC3 v{}'.format(pm.__version__)) import pandas as pd # The Data # Our data consist of 401 daily returns of the S&P 500 stock market index during the 2008 financial crisis. returns = pd.read_csv(pm.get_data('SP500.csv'), parse_dates=True, index_col=0) len(returns) returns.plot(figsize=(10, 6)) plt.ylabel('daily returns in %') print('\n--- The Data ---') #%% Model Specification with pm.Model() as sp500_model: nu = pm.Exponential('nu', 1 / 10., testval=5.) sigma = pm.Exponential('sigma', 1 / 0.02, testval=.1) s = pm.GaussianRandomWalk('s', sigma=sigma, shape=len(returns)) volatility_process = pm.Deterministic('volatility_process',
"figure.facecolor": "#fffff8", "axes.facecolor": "#fffff8", "figure.constrained_layout.use": True, "font.size": 14.0, "hist.bins": "auto", "lines.linewidth": 1.0, }) # %% import data returns = pd.read_csv(pm.get_data( "E:\\Users/Corly/Documents/GitHub/Python/self_course/dissertation/Data/SZ.csv" ), index_col="trade_date") returns["change"] = np.log(returns["close"]).diff() returns = returns.dropna() returns.head() # %% fig, ax = plt.subplots(figsize=(14, 4)) returns.plot(y="change", label="SZ", ax=ax) ax.set(xlabel="time", ylabel="returns") ax.legend() # %%