def random_regression(self): model_randomwalk = pm.Model() item = self.item[:3180] mpl_dates = date2num(item.index) with model_randomwalk: sigma_alpha = pm.Exponential('sigam_alpha', 1.0 / 0.02, testval=0.1) sigma_beta = pm.Exponential('sigma_beta', 1.0 / 0.02, testval=0.1) alpha = GaussianRandomWalk('alpha', sigma_alpha ** -2, shape=int(len(item) / self.subsample_alpha)) beta = GaussianRandomWalk('beta', sigma_beta ** -2, shape=int(len(item) / self.subsample_beta)) alpha_r = np.repeat(alpha, self.subsample_alpha) beta_r = np.repeat(beta, self.subsample_beta) regression = alpha_r + beta_r * item.SLV.values sd = pm.Uniform(name='sd', lower=0, upper=20) likelihood = pm.Normal(name='GLD', mu=regression, sd=sd, observed=item.GLD.values) start = pm.find_MAP(vars=[alpha, beta], fmin=sco.fmin_l_bfgs_b) step = pm.NUTS(scaling=start) trace_rw = pm.sample(500, step, start=start, progressbar=False, tune=2000) part_dates = np.linspace(min(mpl_dates), max(mpl_dates), 53) fig, ax1 = plt.subplots(figsize=(10, 5)) plt.plot(part_dates, np.mean(trace_rw['alpha'], axis=0), 'b', lw=2.5, label='alpha') for i in range(10, 55): plt.plot(part_dates, trace_rw['alpha'][i], 'b-.', lw=0.75) plt.xlabel('date') plt.ylabel('alpha') plt.axis('tight') plt.grid(True) plt.legend(loc=2) ax1.xaxis.set_major_formatter(DateFormatter('%d %b %y')) ax2 = ax1.twinx() plt.plot(part_dates, np.mean(trace_rw['beta'], axis=0), 'r', lw=2.5, label='beta') for i in range(10, 55): plt.plot(part_dates, trace_rw['beta'][i], 'r-.', lw=0.75) plt.ylabel('beta') plt.legend(loc=4) fig.autofmt_xdate() plt.figure(figsize=(10, 5)) plt.scatter(item['SLV'], item['GLD'], c=mpl_dates[:3180], marker='o') plt.colorbar(ticks=DayLocator(interval=250), format=DateFormatter('%d %b %y')) plt.grid(True) plt.xlabel('SLV') plt.ylabel('GLD') x = np.linspace(min(item['SLV']), max(item['SLV'])) for i in range(53): alpha_rw = np.mean(trace_rw['alpha'].T[i]) beta_rw = np.mean(trace_rw['beta'].T[i]) plt.plot(x, alpha_rw + beta_rw * x, color=plt.cm.jet(256 * i / 53))
def configure_sample_stoch_vol_model(log_returns, samples): ''' Configure the stochastic volatility model using PyMC3 in a ’with’ context. Then sample from the model using the No-U-Turn-Sampler (NUTS). Plot the logarithmic volatility process and then the absolute returns overlaid with the estimated vol. ''' print("Configuring stochastic volatility with PyMC3...") model = pm.Model() with model: sigma = pm.Exponential('sigma', 50.0, testval=0.1) nu = pm.Exponential('nu', 0.1) s = GaussianRandomWalk('s', sigma**-2, shape=len(log_returns)) logrets = pm.StudentT( 'logrets', nu, lam=pm.math.exp(-2.0*s), observed=log_returns) print("Fitting the stochastic volatility model...") with model: trace = pm.sample(samples) #pm.traceplot(trace, model.vars[:-1]) #plt.show() print("Plotting the log vol") k = 10 opacity = 0.03 plt.plot(trace[s][::k].T, 'b', alpha=opacity) plt.xlabel("Time") plt.ylabel("Log Vol") plt.show() print("Plotting the absolute returns overlaid with vol...") plt.plot(np.abs(np.exp(log_returns))-1.0, linewidth=0.5) plt.plot(np.exp(trace[s][::k].T), 'r', alpha=opacity) plt.xlabel("Trading Days") plt.ylabel("Absolute Returns/Volatility") plt.show()
def stochastic_vol_model(returns): with pm.Model() as model: step_size = pm.Exponential('sigma', 50.) s = GaussianRandomWalk('s', sd=step_size, shape=len(returns)) nu = pm.Exponential('nu', .1) r = pm.StudentT('r', nu=nu, lam=pm.math.exp(-2*s), observed=returns) with model: trace = pm.sample(tune=2000, nuts_kwargs=dict(target_accept=.9)) return exp(trace[s].T)
def model_stoch_vol(data, samples=2000): """ Run stochastic volatility model. This model estimates the volatility of a returns series over time. Returns are assumed to be T-distributed. lambda (width of T-distributed) is assumed to follow a random-walk. Parameters ---------- data : pandas.Series Return series to model. samples : int, optional Posterior samples to draw. Returns ------- model : pymc.Model object PyMC3 model containing all random variables. trace : pymc3.sampling.BaseTrace object A PyMC3 trace object that contains samples for each parameter of the posterior. See Also -------- plot_stoch_vol : plotting of tochastic volatility model """ from pymc3.distributions.timeseries import GaussianRandomWalk with pm.Model() as model: nu = pm.Exponential('nu', 1. / 10, testval=5.) sigma = pm.Exponential('sigma', 1. / .02, testval=.1) s = GaussianRandomWalk('s', sigma**-2, shape=len(data)) volatility_process = pm.Deterministic('volatility_process', pm.math.exp(-2 * s)) StudentT('r', nu, lam=volatility_process, observed=data) start = pm.find_MAP(vars=[s], fmin=sp.optimize.fmin_l_bfgs_b) step = pm.NUTS(scaling=start) trace = pm.sample(100, step, progressbar=False) # Start next run at the last sampled position. step = pm.NUTS(scaling=trace[-1], gamma=.25) trace = pm.sample(samples, step, start=trace[-1], progressbar=False) return model, trace
def model_stoch_vol(data, samples=2000): """ Run stochastic volatility model. This model estimates the volatility of a returns series over time. Returns are assumed to be T-distributed. lambda (width of T-distributed) is assumed to follow a random-walk. Parameters ---------- data : pandas.Series Return series to model. samples : int, optional Posterior samples to draw. Returns ------- model : pymc.Model object PyMC3 model containing all random variables. trace : pymc3.sampling.BaseTrace object A PyMC3 trace object that contains samples for each parameter of the posterior. See Also -------- plot_stoch_vol : plotting of tochastic volatility model """ from pymc3.distributions.timeseries import GaussianRandomWalk with pm.Model() as model: nu = pm.Exponential('nu', 1. / 10, testval=5.) sigma = pm.Exponential('sigma', 1. / .02, testval=.1) s = GaussianRandomWalk('s', sigma**-2, shape=len(data)) volatility_process = pm.Deterministic('volatility_process', pm.math.exp(-2 * s)) StudentT('r', nu, lam=volatility_process, observed=data) trace = pm.sample(samples) return model, trace
plt.ylabel('daily returns in %') # define the model # \sig ~ exp(50) # why? stdev of returns is approx 0.02 # stdev of exp(lam=50) = 0.2 # \nu ~ exp(0.1) # the DOF for the student T...which should be sample size # mean of exp(lam=0.1) = 10 # s_i ~ normal(s_i-1, \sig^-2) # log(y_i) ~ studentT(\nu, 0, exp(-2s_i)) with Model() as sp500_model: nu = Exponential('nu', 1. / 10, testval=5.) #50, testval=5.)#results similar... sigma = Exponential('sigma', 1. / .02, testval=.1) s = GaussianRandomWalk('s', sigma**-2, shape=len(returns)) volatility_process = Deterministic('volatility_process', exp(-2 * s)) r = StudentT('r', nu, lam=1 / volatility_process, observed=returns) # fit the model using NUTS # NUTS is auto-assigned in sample()...why? # you may get an error like: # WARNING (theano.gof.compilelock): Overriding existing lock by dead process '10876' (I am process '3456') # ignore it...the process will move along with sp500_model: trace = sample(2000, progressbar=False) # plot results from model fitting... # is there a practical reason for starting the plot from 200th sample traceplot(trace[200:], [nu, sigma]) # plot the results: volatility inferred by the model
x = np.array(x) group = np.array(group) idx = np.searchsorted(x0, x) dl = np.array(x - x0[idx - 1]) dr = np.array(x0[idx] - x) d = dl + dr wl = dr / d return wl * y0[idx - 1, group] + (1 - wl) * y0[idx, group] with Model() as model: coeff_sd = HalfCauchy('coeff_sd', 5) y = GaussianRandomWalk('y', sigma=coeff_sd, shape=(nknots, ncountries)) p = interpolate(knots, y, age, group) sd = HalfCauchy('sd', 5) vals = Normal('vals', p, sigma=sd, observed=rate) def run(n=3000): if n == "short": n = 150 with model: trace = sample(n, tune=int(n / 2), init='advi+adapt_diag') for i, country in enumerate(countries):
plt.plot(x, y) plt.xlabel("x") plt.ylabel("y") plt.title("Observed Data") plt.savefig('Observed_Data.png') LARGE_NUMBER = 1e5 model = pm.Model() with model: smoothing_param = shared(0.9) mu = pm.Normal("mu", sd=LARGE_NUMBER) tau = pm.Exponential("tau", 1.0/LARGE_NUMBER) z = GaussianRandomWalk("z", mu=mu, tau=tau / (1.0 - smoothing_param), shape=y.shape) obs = pm.Normal("obs", mu=z, tau=tau / smoothing_param, observed=y) def infer_z(smoothing): with model: smoothing_param.set_value(smoothing) res = pm.find_MAP(vars=[z], fmin=optimize.fmin_l_bfgs_b) return res['z'] # allocate 50% variance to the noise # smoothing = 0.98
from pymc3.distributions.timeseries import GaussianRandomWalk from scipy import optimize import pandas as pd n = 400 returns = pd.read_csv(pm.get_data("SP500.csv"), index_col='date')['change'] returns[:5] fig, ax = plt.subplots(figsize=(14, 8)) returns.plot(label='S&P500') ax.set(xlabel='time', ylabel='returns') ax.legend() with pm.Model() as model: step_size = pm.Exponential('sigma', 50.) s = GaussianRandomWalk('s', sigma=step_size, shape=len(returns)) nu = pm.Exponential('nu', .1) r = pm.StudentT('r', nu=nu, lam=pm.math.exp(-2 * s), observed=returns) with model: trace = pm.sample(tune=2000, target_accept=0.9) pm.traceplot(trace, var_names=['sigma', 'nu']) fig, ax = plt.subplots() plt.plot(trace['s'].T, 'b', alpha=.03) ax.set(title=str(s), xlabel='time', ylabel='log volatility')
def main(group, anType, sessionNum): if group == 'Young': fig_no = 1 else: fig_no = 2 dir = '/Users/adelekap/Documents/WMaze_Analysis/Paper/data/' data_denom = pd.read_csv('{0}{1}Session/{2}{3}Denom.csv'.format( dir, str(sessionNum), anType, group)) # csv of total trials for each day data_numAll = pd.read_csv('{0}{1}Session/{2}{3}Num.csv'.format( dir, str(sessionNum), anType, group)) # correct per day numAnimals = len(data_numAll) with pm.Model() as model_old: sigma = pm.Uniform('sigma', float(sigmaMin), float(sigmaMax)) sigmab = pm.Uniform('sigmab', float(sigmabMin), float(sigmabMax)) betaPop0 = pm.Normal('betaPop0', mu=0, sd=100) beta_0 = pm.Normal('beta_0', mu=betaPop0, sd=sigmab, shape=len(data_numAll)) x = GaussianRandomWalk('x', sd=sigma, init=pm.Normal.dist(mu=0.0, sd=0.01), shape=data_numAll.shape[1]) pm.Deterministic('p', tinvlogit(x + betaPop0)) for rat in range(numAnimals): stp = 'p{0}'.format(rat) stn = 'n{0}'.format(rat) pn = pm.Deterministic(stp, tinvlogit(x + beta_0[rat])) pm.Binomial(stn, p=pn, n=np.asarray(data_denom[rat:(rat + 1)]), observed=np.asarray(data_numAll[rat:(rat + 1)])) with model_old: step1 = pm.NUTS(vars=[x, sigmab, beta_0], gamma=.25) start2 = pm.sample(2000, step1)[-1] # Start next run at the last sampled position. step2 = pm.NUTS(vars=[x, sigmab, beta_0], scaling=start2, gamma=.55) trace1 = pm.sample(5000, step2, start=start2, progressbar=True) print('---------') (waic_val, waic_se, waic_p) = pm.stats.waic(model=model_old, trace=trace1, n_eff=True) dic_val = pm.stats.dic(model=model_old, trace=trace1) print('WAIC ', waic_val, ' DIC ', dic_val) print('---------') # plt.figure(50) # pm.traceplot(trace1, varnames=['sigmab', 'beta_0', 'sigma']) # plt.savefig('trace' + group + '.pdf') lt1 = {} for ii in range(len(data_numAll)): lc = 'p' + str(ii) summary_dataset = np.percentile(trace1[lc][:], [5, 50, 95], axis=0) # lt1[ii] = plot_results(np.asarray(summary_dataset), fig_no, ii + 1, group) if anType == 'overall': dtype = 'Overall' if anType == 'inbound': dtype = 'Inbound' if anType == 'outbound': dtype = 'Outbound' dataDir = '/Users/adelekap/Documents/WMaze_Analysis/StochasticVolatility/BySession/' txtFile = '{0}{1}Learning/{1}_{2}_learningTrials.txt'.format( dataDir, dtype, group) with open(txtFile, 'w') as learn: lts = lt1.values() for trial in lts: learn.write(str(trial) + '\n') learn.write("AVERAGE LEARNING TRIAL: " + str(np.average(lts)) + '\n') learn.write("STANDARD ERROR: " + str(stats.sem(lts))) print "|||||||||||Completed Analysis for " + group + " data|||||||||||||" summary_dataset = np.percentile(trace1['p'], [5, 50, 95], axis=0) with open('{0}{1}DATASET.txt'.format(group, anType), 'w') as data: data.write(str(np.asarray(summary_dataset))) # plot_results(np.asarray(summary_dataset), 3, 2, group) return trace1['p']
def main(): #Load mastectomy dataset df = datasets.get_rdataset('mastectomy', 'HSAUR', cache=True).data #Change event to integer df.event = df.event.astype(np.int64) #Change metastized to integer (1 for yes, 0 for no) df.metastized = (df.metastized == 'yes').astype(np.int64) #Count the number of patients n_patients = df.shape[0] #Create array for each individual patient patients = np.arange(n_patients) #Censoring - we do not observe the death of every subject, and subjects may still be alive at time t=0 #1 - observation is not censored (death was observed) #0 - observation is censored (death was not observed) nonCensored = df.event.mean() #Create censoring plot fig, ax = plt.subplots(figsize=(8, 6)) blue, _, red = sns.color_palette()[:3] #Create horizontal lines for censored observations ax.hlines(patients[df.event.values == 0], 0, df[df.event.values == 0].time, color=blue, label='Censored') #Create horizontal red lines for uncensored observations ax.hlines(patients[df.event.values == 1], 0, df[df.event.values == 1].time, color=red, label='Uncensored') #Create scatter ppoints for metastized months ax.scatter(df[df.metastized.values == 1].time, patients[df.metastized.values == 1], color='k', zorder=10, label='Metastized') ax.set_xlim(left=0) ax.set_xlabel('Months since mastectomy') ax.set_yticks([]) ax.set_ylabel('Subject') ax.set_ylim(-0.25, n_patients + 0.25) ax.legend(loc='center right') #To understand the impact of metastization on survival time, we use a risk regression model #Cox proportional hazards model #Make intervals 3 months long interval_length = 3 interval_bounds = np.arange(0, df.time.max() + interval_length + 1, interval_length) n_intervals = interval_bounds.size - 1 intervals = np.arange(n_intervals) #Check how deaths and censored observations are distributed in intervals fig, ax = plt.subplots(figsize=(8, 6)) #Plot histogram of uncensored events ax.hist(df[df.event == 1].time.values, bins=interval_bounds, color=red, alpha=0.5, lw=0, label='Uncensored') #Plot histogram of censored events ax.hist(df[df.event == 0].time.values, bins=interval_bounds, color=blue, alpha=0.5, lw=0, label='Censored') ax.set_xlim(0, interval_bounds[-1]) ax.set_xlabel('Months since mastectomy') ax.set_yticks([0, 1, 2, 3]) ax.set_ylabel('Number of observations') ax.legend() #Calculates the last interval period when a subject was alive last_period = np.floor((df.time - 0.01) / interval_length).astype(int) #Creates an empty matrix to store deaths death = np.zeros((n_patients, n_intervals)) #For each patient (row), create an event where the last interval period was observed (column) death[patients, last_period] = df.event #Create matrix of the amount of time a subject (row) was at risk in an interval (column) exposure = np.greater_equal.outer(df.time, interval_bounds[:-1]) * interval_length exposure[patients, last_period] = df.time - interval_bounds[last_period] #Define parameters for PyMC SEED = 5078864 n_samples = 1000 n_tune = 1000 #Create PyMC model -> lambda(t) = lambda0(t) * e ^ (X*beta) with pm.Model() as model: #Define prior distribution of hazards as vague Gamma distribution lambda0 = pm.Gamma('lambda0', 0.01, 0.01, shape=n_intervals) #Define hazard regression coefficients (beta) for covariates X as a normal distribution beta = pm.Normal('beta', 0, sd=1000) #Create equation for lambda(t) as a deterministic node - record sampled values as part of output #T.outer = symbolic matrix, vector-vector outer product lambda_ = pm.Deterministic( 'lambda_', T.outer(T.exp(beta * df.metastized), lambda0)) #Mu is created from our lambda values (hazard) times patient exposure per interval mu = pm.Deterministic('mu', exposure * lambda_) #We model the posterior distribution as a Poisson distribution with mean Mu obs = pm.Poisson('obs', mu, observed=death) with model: trace = pm.sample(n_samples, tune=n_tune, random_seed=SEED) pm.traceplot(trace) #Calculate hazard rate for subjects with metastized cancer (based on regression coefficients) hazardRate = np.exp(trace['beta'].mean()) pm.plot_posterior(trace, varnames=['beta'], color='#87ceeb') pm.autocorrplot(trace, varnames=['beta']) #Store base hazard as well as metastized hazard for each sample per interval #(sample x number of intervals) base_hazard = trace['lambda0'] met_hazard = trace['lambda0'] * np.exp(np.atleast_2d(trace['beta']).T) #Calculate cumulative hazard def cum_hazard(hazard): return (interval_length * hazard).cumsum(axis=-1) #Calculative survival as = e^(-cumulative hazard) def survival(hazard): return np.exp(-cum_hazard(hazard)) #Plot highest posterior density def plot_with_hpd(x, hazard, f, ax, color=None, label=None, alpha=0.05): #Use function f on hazard mean mean = f(hazard.mean(axis=0)) #Create confidence percentiles percentiles = 100 * np.array([alpha / 2., 1. - alpha / 2.]) hpd = np.percentile(f(hazard), percentiles, axis=0) ax.fill_between(x, hpd[0], hpd[1], color=color, alpha=0.25) ax.step(x, mean, color=color, label=label) #Create figure fig, (hazard_ax, surv_ax) = plt.subplots(ncols=2, sharex=True, sharey=False, figsize=(16, 6)) #Plot Hazard with HPD up until the last interval for non-metasized cancer plot_with_hpd(interval_bounds[:-1], base_hazard, cum_hazard, hazard_ax, color=blue, label='Had not metastized') #Plot Hazard with HPD up until the last interval for metasized cancer plot_with_hpd(interval_bounds[:-1], met_hazard, cum_hazard, hazard_ax, color=red, label='Metastized') hazard_ax.set_xlim(0, df.time.max()) hazard_ax.set_xlabel('Months since mastectomy') hazard_ax.set_ylabel(r'Cumulative hazard $\Lambda(t)$') hazard_ax.legend(loc=2) #Plot Survival with HPD up until the last interval for non-metasized cancer plot_with_hpd(interval_bounds[:-1], base_hazard, survival, surv_ax, color=blue) #Plot Survival with HPD up until the last interval for metasized cancer plot_with_hpd(interval_bounds[:-1], met_hazard, survival, surv_ax, color=red) surv_ax.set_xlim(0, df.time.max()) surv_ax.set_xlabel('Months since mastectomy') surv_ax.set_ylabel('Survival function $S(t)$') fig.suptitle('Bayesian survival model') #Consider time varying effects with pm.Model() as time_varying_model: lambda0 = pm.Gamma('lambda0', 0.01, 0.01, shape=n_intervals) #Beta is now modeled as a normal random walk instead of a normal distribution #This is due to the fact that the regression coefficients can vary over time beta = GaussianRandomWalk('beta', tau=1., shape=n_intervals) lambda_ = pm.Deterministic( 'h', lambda0 * T.exp(T.outer(T.constant(df.metastized), beta))) mu = pm.Deterministic('mu', exposure * lambda_) obs = pm.Poisson('obs', mu, observed=death) with time_varying_model: time_varying_trace = pm.sample(n_samples, tune=n_tune, random_seed=SEED) pm.traceplot(time_varying_trace) pm.plot_posterior(time_varying_trace, varnames=['beta'], color='#87ceeb') pm.forestplot(time_varying_trace, varnames=['beta']) #Create plot to show the mean trace of beta fig, ax = plt.subplots(figsize=(8, 6)) #Create percentiles of the new trace beta_hpd = np.percentile(time_varying_trace['beta'], [2.5, 97.5], axis=0) beta_low = beta_hpd[0] beta_high = beta_hpd[1] #Fill percentile interval ax.fill_between(interval_bounds[:-1], beta_low, beta_high, color=blue, alpha=0.25) #Create the mean estimate for beta from trace samples beta_hat = time_varying_trace['beta'].mean(axis=0) #Plot a stepwise line for beta_hat per interval ax.step(interval_bounds[:-1], beta_hat, color=blue) #Plot points where cancer was metastized, differentiation between death and censorship ax.scatter(interval_bounds[last_period[(df.event.values == 1) & (df.metastized == 1)]], beta_hat[last_period[(df.event.values == 1) & (df.metastized == 1)]], c=red, zorder=10, label='Died, cancer metastized') ax.scatter(interval_bounds[last_period[(df.event.values == 0) & (df.metastized == 1)]], beta_hat[last_period[(df.event.values == 0) & (df.metastized == 1)]], c=blue, zorder=10, label='Censored, cancer metastized') ax.set_xlim(0, df.time.max()) ax.set_xlabel('Months since mastectomy') ax.set_ylabel(r'$\beta_j$') ax.legend() #Store time-varying model tv_base_hazard = time_varying_trace['lambda0'] tv_met_hazard = time_varying_trace['lambda0'] * np.exp( np.atleast_2d(time_varying_trace['beta'])) #Plot cumulative hazard functions with and without time-varying effect fig, ax = plt.subplots(figsize=(8, 6)) ax.step(interval_bounds[:-1], cum_hazard(base_hazard.mean(axis=0)), color=blue, label='Had not metastized') ax.step(interval_bounds[:-1], cum_hazard(met_hazard.mean(axis=0)), color=red, label='Metastized') ax.step(interval_bounds[:-1], cum_hazard(tv_base_hazard.mean(axis=0)), color=blue, linestyle='--', label='Had not metastized (time varying effect)') ax.step(interval_bounds[:-1], cum_hazard(tv_met_hazard.mean(axis=0)), color=red, linestyle='--', label='Metastized (time varying effect)') ax.set_xlim(0, df.time.max() - 4) ax.set_xlabel('Months since mastectomy') ax.set_ylim(0, 2) ax.set_ylabel(r'Cumulative hazard $\Lambda(t)$') ax.legend(loc=2) #Plot cumulative hazard and survival models with HPD fig, (hazard_ax, surv_ax) = plt.subplots(ncols=2, sharex=True, sharey=False, figsize=(16, 6)) plot_with_hpd(interval_bounds[:-1], tv_base_hazard, cum_hazard, hazard_ax, color=blue, label='Had not metastized') plot_with_hpd(interval_bounds[:-1], tv_met_hazard, cum_hazard, hazard_ax, color=red, label='Metastized') hazard_ax.set_xlim(0, df.time.max()) hazard_ax.set_xlabel('Months since mastectomy') hazard_ax.set_ylim(0, 2) hazard_ax.set_ylabel(r'Cumulative hazard $\Lambda(t)$') hazard_ax.legend(loc=2) plot_with_hpd(interval_bounds[:-1], tv_base_hazard, survival, surv_ax, color=blue) plot_with_hpd(interval_bounds[:-1], tv_met_hazard, survival, surv_ax, color=red) surv_ax.set_xlim(0, df.time.max()) surv_ax.set_xlabel('Months since mastectomy') surv_ax.set_ylabel('Survival function $S(t)$') fig.suptitle('Bayesian survival model with time varying effects') plt.show() print('x')
def bayes_randomwalk(): # NOTE not compatible in python 3 version data = pd.DataFrame() symbols = ['GLD', 'GDX'] for sym in symbols: data[sym] = web.DataReader(sym, data_source='google')['Close'] pdb.set_trace() model_randomwalk = pm.Model() with model_randomwalk: # std of random walk best sampled in log space sigma_alpha, log_sigma_alpha = \ model_randomwalk.TransformedVar('sigma_alpha', pm.Exponential.dist(1. / .02, testval=.1), pm.logtransform) sigma_beta, log_sigma_beta = \ model_randomwalk.TransformedVar('sigma_beta', pm.Exponential.dist(1. / .02, testval=.1), pm.logtransform) # to make the model more simple, we will apply the same coefficients # to 50 data points at a time subsample_alpha = 50 subsample_beta = 50 with model_randomwalk: alpha = GaussianRandomWalk('alpha', sigma_alpha**-2, shape=len(data) / subsample_alpha) beta = GaussianRandomWalk('beta', sigma_beta**-2, shape=len(data) / subsample_beta) # make coefficients have the same length as prices alpha_r = np.repeat(alpha, subsample_alpha) beta_r = np.repeat(beta, subsample_beta) print(len(data.dropna().GDX.values)) # a bit longer than 1,950 with model_randomwalk: # define regression regression = alpha_r + beta_r * data.GDX.values[:1950] # assume prices are normally distributed, # the mean comes from the regression sd = pm.Uniform('sd', 0, 20) likelihood = pm.Normal('GLD', mu=regression, sd=sd, observed=data.GLD.values[:1950]) with model_randomwalk: # first optimize random walk start = pm.find_MAP(vars=[alpha, beta], fmin=sco.fmin_l_bfgs_b) # sampling step = pm.NUTS(scaling=start) trace_rw = pm.sample(100, step, start=start, progressbar=False) print(np.shape(trace_rw['alpha'])) part_dates = np.linspace(min(mpl_dates), max(mpl_dates), 39) fig, ax1 = plt.subplots(figsize=(10, 5)) plt.plot(part_dates, np.mean(trace_rw['alpha'], axis=0), 'b', lw=2.5, label='alpha') for i in range(45, 55): plt.plot(part_dates, trace_rw['alpha'][i], 'b-.', lw=0.75) plt.xlabel('date') plt.ylabel('alpha') plt.axis('tight') plt.grid(True) plt.legend(loc=2) ax1.xaxis.set_major_formatter(mpl.dates.DateFormatter('%d %b %y')) ax2 = ax1.twinx() plt.plot(part_dates, np.mean(trace_rw['beta'], axis=0), 'r', lw=2.5, label='beta') for i in range(45, 55): plt.plot(part_dates, trace_rw['beta'][i], 'r-.', lw=0.75) plt.ylabel('beta') plt.legend(loc=4) fig.autofmt_xdate() plt.savefig(PATH + 'bayes8.png', dpi=300) plt.close() plt.figure(figsize=(10, 5)) plt.scatter(data['GDX'], data['GLD'], c=mpl_dates, marker='o') plt.colorbar(ticks=mpl.dates.DayLocator(interval=250), format=mpl.dates.DateFormatter('%d %b %y')) plt.grid(True) plt.xlabel('GDX') plt.ylabel('GLD') x = np.linspace(min(data['GDX']), max(data['GDX'])) for i in range(39): alpha_rw = np.mean(trace_rw['alpha'].T[i]) beta_rw = np.mean(trace_rw['beta'].T[i]) plt.plot(x, alpha_rw + beta_rw * x, color=plt.cm.jet(256 * i / 39)) plt.savefig(PATH + 'bayes9.png', dpi=300) plt.close()