def test_moment(self, mu, sigma, init, steps, size, expected): with Model() as model: GaussianRandomWalk("x", mu=mu, sigma=sigma, init=init, steps=steps, size=size) assert_moment_is_expected(model, expected)
def test_gaussianrandomwalk_inference(self): mu, sigma, steps = 2, 1, 1000 obs = np.concatenate([[0], np.random.normal(mu, sigma, size=steps)]).cumsum() with pm.Model(): _mu = pm.Uniform("mu", -10, 10) _sigma = pm.Uniform("sigma", 0, 10) obs_data = pm.MutableData("obs_data", obs) grw = GaussianRandomWalk("grw", _mu, _sigma, steps=steps, observed=obs_data) trace = pm.sample(chains=1) recovered_mu = trace.posterior["mu"].mean() recovered_sigma = trace.posterior["sigma"].mean() np.testing.assert_allclose([mu, sigma], [recovered_mu, recovered_sigma], atol=0.2)
def test_inferred_steps_from_observed(self): with pm.Model(): x = GaussianRandomWalk("x", observed=np.zeros(10)) steps = x.owner.inputs[-1] assert steps.eval() == 9
def test_inferred_steps_from_dims(self): with pm.Model(coords={"batch": range(5), "steps": range(20)}): x = GaussianRandomWalk("x", dims=("batch", "steps")) steps = x.owner.inputs[-1] assert steps.eval() == 19
def test_inconsistent_steps_and_shape(self): with pytest.raises(AssertionError, match="Steps do not match last shape dimension"): x = GaussianRandomWalk.dist(steps=12, shape=45)
def test_missing_steps(self, shape): with pytest.raises(ValueError, match="Must specify steps or shape parameter"): GaussianRandomWalk.dist(shape=shape)
def test_inferred_steps_from_shape(self, shape): x = GaussianRandomWalk.dist(shape=shape) steps = x.owner.inputs[-1] assert steps.eval() == 5
def realdata(): import warnings warnings.simplefilter("ignore") import zipline import pytz import datetime as dt data = zipline.data.load_from_yahoo(stocks=["GLD", "GDX"], end=dt.datetime(2014, 3, 15, 0, 0, 0, 0, pytz.utc)).dropna() data.info() data.plot(figsize=(8, 4)) data.ix[-1] / data.ix[0] - 1 data.corr() data.index import matplotlib as mpl mpl_dates = mpl.dates.date2num(data.index) mpl_dates plt.figure(figsize=(8, 4)) plt.scatter(data["GDX"], data["GLD"], c=mpl_dates, marker="o") plt.grid(True) plt.xlabel("GDX") plt.ylabel("GLD") plt.colorbar(ticks=mpl.dates.DayLocator(interval=250), format=mpl.dates.DateFormatter("%d %b %y")) with pm.Model() as model: alpha = pm.Normal("alpha", mu=0, sd=20) beta = pm.Normal("beta", mu=0, sd=20) sigma = pm.Uniform("sigma", lower=0, upper=50) y_est = alpha + beta * data["GDX"].values likelihood = pm.Normal("GLD", mu=y_est, sd=sigma, observed=data["GLD"].values) start = pm.find_MAP() step = pm.NUTS(state=start) trace = pm.sample(100, step, start=start, progressbar=False) fig = pm.traceplot(trace) plt.figure(figsize=(8, 8)) plt.figure(figsize=(8, 4)) plt.scatter(data["GDX"], data["GLD"], c=mpl_dates, marker="o") plt.grid(True) plt.xlabel("GDX") plt.ylabel("GLD") for i in range(len(trace)): plt.plot(data["GDX"], trace["alpha"][i] + trace["beta"][i] * data ["GDX"]) plt.colorbar(ticks=mpl.dates.DayLocator(interval=250), format=mpl.dates.DateFormatter("%d %b %y")) model_randomwalk = pm.Model() with model_randomwalk: # std of random walk best sampled in log space sigma_alpha, log_sigma_alpha = \ model_randomwalk.TransformedVar("sigma_alpha", pm.Exponential.dist(1. / .02, testval=.1), pm.logtransform) sigma_beta, log_sigma_beta = \ model_randomwalk.TransformedVar("sigma_beta", pm.Exponential.dist(1. / .02, testval=.1), pm.logtransform) from pymc.distributions.timeseries import GaussianRandomWalk # to make the model simpler, we will apply the same coefficients # to 50 data points at a time subsample_alpha = 50 subsample_beta = 50 with model_randomwalk: alpha = GaussianRandomWalk("alpha", sigma_alpha**-2, shape=len(data) / subsample_alpha) beta = GaussianRandomWalk("beta", sigma_beta**-2, shape=len(data) / subsample_beta) # make coefficients have the same length as prices alpha_r = np.repeat(alpha, subsample_alpha) beta_r = np.repeat(beta, subsample_beta) len(data.dropna().GDX.values) with model_randomwalk: # define regression regression = alpha_r + beta_r * data.GDX.values[:1950] # assume prices are normally distributed # the mean comes from the regression sd = pm.Uniform("sd", 0, 20) likelihood = pm.Normal("GLD", mu=regression, sd=sd, observed=data.GLD.values[:1950]) import scipy.optimize as sco with model_randomwalk: # first optimize random walk start = pm.find_MAP(vars=[alpha, beta], fmin=sco.fmin_l_bfgs_b) # sampling step = pm.NUTS(scaling=start) trace_rw = pm.sample(100, step, start=start, progressbar=False) np.shape(trace_rw["alpha"]) part_dates = np.linspace(min(mpl_dates), max(mpl_dates), 39) fig, ax1 = plt.subplots(figsize=(10, 5)) plt.plot(part_dates, np.mean(trace_rw["alpha"], axis=0), "b", lw=2.5, label="alpha") for i in range(45, 55): plt.plot(part_dates, trace_rw["alpha"][i], "b-.", lw=0.75) plt.xlabel("date") plt.ylabel("alpha") plt.axis("tight") plt.grid(True) plt.legend(loc=2) ax1.xaxis.set_major_formatter(mpl.dates.DateFormatter("%d %b %y") ) ax2 = ax1.twinx() plt.plot(part_dates, np.mean(trace_rw["beta"], axis=0), "r", lw=2.5, label="beta") for i in range(45, 55): plt.plot(part_dates, trace_rw["beta"][i], "r-.", lw=0.75) plt.ylabel("beta") plt.legend(loc=4) fig.autofmt_xdate() plt.figure(figsize=(10, 5)) plt.scatter(data["GDX"], data["GLD"], c=mpl_dates, marker="o") plt.colorbar(ticks=mpl.dates.DayLocator(interval=250), format=mpl.dates.DateFormatter("%d %b %y")) plt.grid(True) plt.xlabel("GDX") plt.ylabel("GLD") x = np.linspace(min(data["GDX"]), max(data["GDX"])) for i in range(39): alpha_rw = np.mean(trace_rw["alpha"].T[i]) beta_rw = np.mean(trace_rw["beta"].T[i]) plt.plot(x, alpha_rw + beta_rw * x, color=plt.cm.jet(256 * i / 39)) pass
plt.show() returns.plot(title='return of NIKKEI index close price', figsize=(30, 8)) nreturns = np.array(returns[1:])[::-1] import pymc as pm from pymc.distributions.timeseries import GaussianRandomWalk from scipy.sparse import csc_matrix from scipy import optimize with pm.Model() as model: sigma, log_sigma = model.TransformedVar( 'sigma', pm.Exponential.dist(1. / .02, testval=.1), pm.logtransform) nu = pm.Exponential('nu', 1. / 10) s = GaussianRandomWalk('s', sigma**-2, shape=len(nreturns)) r = pm.T('r', nu, lam=pm.exp(-2 * s), observed=nreturns) with model: start = pm.find_MAP(vars=[s], fmin=optimize.fmin_l_bfgs_b) step = pm.NUTS(scaling=start) trace = pm.sample(2000, step, start, progressbar=False) plt.plot(trace[s][::10].T, 'b', alpha=.03) plt.title('log volatility') with model: pm.traceplot(trace, model.vars[:2]) exps = np.exp(trace[s][::10].T) plt.plot(returns[:600][::-1])