def fit(self, X, B, T): n, k = X.shape with pymc3.Model() as m: beta_sd = pymc3.Exponential( 'beta_sd', 1.0) # Weak prior for the regression coefficients beta = pymc3.Normal('beta', mu=0, sd=beta_sd, shape=(k, )) # Regression coefficients c = sigmoid(dot(X, beta)) # Conversion rates for each example k = pymc3.Lognormal('k', mu=0, sd=1.0) # Weak prior around k=1 lambd = pymc3.Exponential('lambd', 0.1) # Weak prior # PDF of Weibull: k * lambda * (x * lambda)^(k-1) * exp(-(t * lambda)^k) LL_observed = log(c) + log(k) + log( lambd) + (k - 1) * (log(T) + log(lambd)) - (T * lambd)**k # CDF of Weibull: 1 - exp(-(t * lambda)^k) LL_censored = log((1 - c) + c * exp(-(T * lambd)**k)) # We need to implement the likelihood using pymc3.Potential (custom likelihood) # https://github.com/pymc-devs/pymc3/issues/826 logp = B * LL_observed + (1 - B) * LL_censored logpvar = pymc3.Potential('logpvar', logp.sum()) self.trace = pymc3.sample(n_simulations=500, tune=500, discard_tuned_samples=True, njobs=1) print('done') print('done 2')
def hmetad_groupLevel(data: dict, sample_model: bool = True, **kwargs): """Compute hierachical meta-d' at the subject level. This is an internal function. The group level model must be called using :py:func:`metadPy.hierarchical.hmetad`. Parameters ---------- data : dict Response data. sample_model : boolean If `False`, only the model is returned without sampling. **kwargs : keyword arguments All keyword arguments are passed to `func::pymc3.sampling.sample`. Returns ------- model : :py:class:`pymc3.Model` instance The pymc3 model. Encapsulates the variables and likelihood factors. trace : :py:class:`pymc3.backends.base.MultiTrace` or :py:class:`arviz.InferenceData` A `MultiTrace` or `ArviZ InferenceData` object that contains the samples. References ---------- .. [#] Fleming, S.M. (2017) HMeta-d: hierarchical Bayesian estimation of metacognitive efficiency from confidence ratings, Neuroscience of Consciousness, 3(1) nix007, https://doi.org/10.1093/nc/nix007 """ nSubj = data["nSubj"] hits = data["hits"] falsealarms = data["falsealarms"] s = data["s"] n = data["n"] counts = data["counts"] nRatings = data["nRatings"] Tol = data["Tol"] cr = data["cr"] m = data["m"] with Model() as model: # hyperpriors on d, c and c2 mu_c1 = Normal( "mu_c1", mu=0, tau=0.01, shape=(1), testval=np.random.rand() * 0.1 ) mu_c2 = Normal( "mu_c2", mu=0, tau=0.01, shape=(1, 1), testval=np.random.rand() * 0.1 ) mu_d1 = Normal( "mu_d1", mu=0, tau=0.01, shape=(1), testval=np.random.rand() * 0.1 ) sigma_c1 = HalfNormal( "sigma_c1", tau=0.01, shape=(1), testval=np.random.rand() * 0.1 ) sigma_c2 = HalfNormal( "sigma_c2", tau=0.01, shape=(1, 1), testval=np.random.rand() * 0.1 ) sigma_d1 = HalfNormal( "sigma_d1", tau=0.01, shape=(1), testval=np.random.rand() * 0.1 ) # Type 1 priors c1_tilde = Normal("c1_tilde", mu=0, sigma=1, shape=(nSubj, 1)) c1 = Deterministic("c1", mu_c1 + sigma_c1 * c1_tilde) d1_tilde = Normal("d1_tilde", mu=0, sigma=1, shape=(nSubj, 1)) d1 = Deterministic("d1", mu_d1 + sigma_d1 * d1_tilde) # TYPE 1 SDT BINOMIAL MODEL h = cumulative_normal(d1 / 2 - c1) f = cumulative_normal(-d1 / 2 - c1) H = Binomial("H", n=s, p=h, observed=hits) FA = Binomial("FA", n=n, p=f, observed=falsealarms) # Hyperpriors on mRatio mu_logMratio = Normal( "mu_logMratio", mu=0, tau=1, shape=(1), testval=np.random.rand() * 0.1 ) sigma_delta = HalfNormal("sigma_delta", tau=1, shape=(1)) delta_tilde = Normal("delta_tilde", mu=0, sigma=1, shape=(nSubj, 1)) delta = Deterministic("delta", sigma_delta * delta_tilde) epsilon_logMratio = Beta("epsilon_logMratio", 1, 1, shape=(1)) logMratio = Deterministic("logMratio", mu_logMratio + epsilon_logMratio * delta) mRatio = Deterministic("mRatio", math.exp(logMratio)) # Type 2 priors meta_d = Deterministic("meta_d", mRatio * d1) # Specify ordered prior on criteria # bounded above and below by Type 1 c1 cS1_hn = Normal( "cS1_hn", mu=0, sigma=1, shape=(nSubj, nRatings - 1), testval=np.linspace(-1.5, -0.5, nRatings - 1) .reshape(1, nRatings - 1) .repeat(nSubj, axis=0), ) cS1 = Deterministic("cS1", -mu_c2 + (cS1_hn * sigma_c2)) cS2_hn = Normal( "cS2_hn", mu=0, sigma=1, shape=(nSubj, nRatings - 1), testval=np.linspace(0.5, 1.5, nRatings - 1) .reshape(1, nRatings - 1) .repeat(nSubj, axis=0), ) cS2 = Deterministic("cS2", mu_c2 + (cS2_hn * sigma_c2)) # Means of SDT distributions S2mu = meta_d / 2 S1mu = -meta_d / 2 # Calculate normalisation constants C_area_rS1 = cumulative_normal(c1 - S1mu) I_area_rS1 = cumulative_normal(c1 - S2mu) C_area_rS2 = 1 - cumulative_normal(c1 - S2mu) I_area_rS2 = 1 - cumulative_normal(c1 - S1mu) # Get nC_rS1 probs nC_rS1 = cumulative_normal(cS1 - S1mu) / C_area_rS1 nC_rS1 = Deterministic( "nC_rS1", math.concatenate( ( [ cumulative_normal(cS1[:, 0].reshape((nSubj, 1)) - S1mu) / C_area_rS1, nC_rS1[:, 1:] - nC_rS1[:, :-1], ( ( cumulative_normal(c1 - S1mu) - cumulative_normal( cS1[:, nRatings - 2].reshape((nSubj, 1)) - S1mu ) ) / C_area_rS1 ), ] ), axis=1, ), ) # Get nI_rS2 probs nI_rS2 = (1 - cumulative_normal(cS2 - S1mu)) / I_area_rS2 nI_rS2 = Deterministic( "nI_rS2", math.concatenate( ( [ ( (1 - cumulative_normal(c1 - S1mu)) - ( 1 - cumulative_normal( cS2[:, 0].reshape((nSubj, 1)) - S1mu ) ) ) / I_area_rS2, nI_rS2[:, :-1] - (1 - cumulative_normal(cS2[:, 1:] - S1mu)) / I_area_rS2, ( 1 - cumulative_normal( cS2[:, nRatings - 2].reshape((nSubj, 1)) - S1mu ) ) / I_area_rS2, ] ), axis=1, ), ) # Get nI_rS1 probs nI_rS1 = (-cumulative_normal(cS1 - S2mu)) / I_area_rS1 nI_rS1 = Deterministic( "nI_rS1", math.concatenate( ( [ cumulative_normal(cS1[:, 0].reshape((nSubj, 1)) - S2mu) / I_area_rS1, nI_rS1[:, :-1] + (cumulative_normal(cS1[:, 1:] - S2mu)) / I_area_rS1, ( cumulative_normal(c1 - S2mu) - cumulative_normal( cS1[:, nRatings - 2].reshape((nSubj, 1)) - S2mu ) ) / I_area_rS1, ] ), axis=1, ), ) # Get nC_rS2 probs nC_rS2 = (1 - cumulative_normal(cS2 - S2mu)) / C_area_rS2 nC_rS2 = Deterministic( "nC_rS2", math.concatenate( ( [ ( (1 - cumulative_normal(c1 - S2mu)) - ( 1 - cumulative_normal( cS2[:, 0].reshape((nSubj, 1)) - S2mu ) ) ) / C_area_rS2, nC_rS2[:, :-1] - ((1 - cumulative_normal(cS2[:, 1:] - S2mu)) / C_area_rS2), ( 1 - cumulative_normal( cS2[:, nRatings - 2].reshape((nSubj, 1)) - S2mu ) ) / C_area_rS2, ] ), axis=1, ), ) # Avoid underflow of probabilities nC_rS1 = math.switch(nC_rS1 < Tol, Tol, nC_rS1) nI_rS2 = math.switch(nI_rS2 < Tol, Tol, nI_rS2) nI_rS1 = math.switch(nI_rS1 < Tol, Tol, nI_rS1) nC_rS2 = math.switch(nC_rS2 < Tol, Tol, nC_rS2) # TYPE 2 SDT MODEL (META-D) # Multinomial likelihood for response counts ordered as c(nR_S1,nR_S2) Multinomial( "CR_counts", cr, nC_rS1, shape=(nSubj, nRatings), observed=counts[:, :nRatings], ) Multinomial( "FA_counts", FA, nI_rS2, shape=(nSubj, nRatings), observed=counts[:, nRatings : nRatings * 2], ) Multinomial( "M_counts", m, nI_rS1, shape=(nSubj, nRatings), observed=counts[:, nRatings * 2 : nRatings * 3], ) Multinomial( "H_counts", H, nC_rS2, shape=(nSubj, nRatings), observed=counts[:, nRatings * 3 : nRatings * 4], ) if sample_model is True: trace = sample(return_inferencedata=True, **kwargs) return model, trace else: return model
# define the model # \sig ~ exp(50) # why? stdev of returns is approx 0.02 # stdev of exp(lam=50) = 0.2 # \nu ~ exp(0.1) # the DOF for the student T...which should be sample size # mean of exp(lam=0.1) = 10 # s_i ~ normal(s_i-1, \sig^-2) # log(y_i) ~ studentT(\nu, 0, exp(-2s_i)) with Model() as sp500_model: nu = Exponential('nu', 1. / 10, testval=5.) #50, testval=5.)#results similar... sigma = Exponential('sigma', 1. / .02, testval=.1) s = GaussianRandomWalk('s', sigma**-2, shape=len(returns)) volatility_process = Deterministic('volatility_process', exp(-2 * s)) r = StudentT('r', nu, lam=1 / volatility_process, observed=returns) # fit the model using NUTS # NUTS is auto-assigned in sample()...why? # you may get an error like: # WARNING (theano.gof.compilelock): Overriding existing lock by dead process '10876' (I am process '3456') # ignore it...the process will move along with sp500_model: trace = sample(2000, progressbar=False) # plot results from model fitting... # is there a practical reason for starting the plot from 200th sample traceplot(trace[200:], [nu, sigma]) # plot the results: volatility inferred by the model fig, ax = plt.subplots() #figsize=(15, 8))
def make_model(A_re_data, A_im_data, E_re_data, E_im_data, Tobs, f0, fdot, fddot, sigma, hbin, lnAlow, lnAhigh, N, start_pt={}): f0_mean = f0 fdot_mean = fdot fddot_mean = fddot with pm.Model() as model: _ = pm.Data('sigma', sigma) _ = pm.Data('hbin', hbin) _ = pm.Data('Tobs', Tobs) _ = pm.Data('N', N) A_re_data = pm.Data('A_re_data', A_re_data) A_im_data = pm.Data('A_im_data', A_im_data) E_re_data = pm.Data('E_re_data', E_re_data) E_im_data = pm.Data('E_im_data', E_im_data) n_phi = pm.Normal('n_phi', mu=zeros(2), sigma=ones(2), shape=(2, ), testval=start_pt.get('n_phi', randn(2))) phi0 = pm.Deterministic('phi0', tt.arctan2(n_phi[1], n_phi[0])) dphi_f0 = pm.Normal('dphi_f0', mu=0, sigma=pi, testval=0) dphi_fdot = pm.Normal('dphi_fdot', mu=0, sigma=pi, testval=0) dphi_fddot = pm.Normal('dphi_fddot', mu=0, sigma=pi, testval=0) f0 = pm.Deterministic('f0', f0_mean + dphi_f0 / (2 * pi * Tobs)) fdot = pm.Deterministic('fdot', fdot_mean + dphi_fdot / (pi * Tobs * Tobs)) fddot = pm.Deterministic( 'fddot', fddot_mean + 3.0 * dphi_fddot / (pi * Tobs * Tobs * Tobs)) cos_iota = pm.Uniform('cos_iota', lower=-1, upper=1, testval=start_pt.get( 'cos_iota', np.random.uniform(low=-1, high=1))) iota = pm.Deterministic('iota', tt.arccos(cos_iota)) # This 2-vector gives 2*psi n_2psi = pm.Normal('n_2psi', mu=zeros(2), sigma=ones(2), shape=(2, ), testval=start_pt.get('n_2psi', randn(2))) psi = pm.Deterministic('psi', tt.arctan2(n_2psi[1], n_2psi[0]) / 2) n_ra_dec = pm.Normal('n_ra_dec', mu=zeros(3), sigma=ones(3), shape=(3, ), testval=start_pt.get('nhat', randn(3))) nhat = pm.Deterministic( 'nhat', n_ra_dec / pmm.sqrt(tt.tensordot(n_ra_dec, n_ra_dec, axes=1))) _ = pm.Deterministic('phi', tt.arctan2(n_ra_dec[1], n_ra_dec[0])) _ = pm.Deterministic('theta', tt.arccos(nhat[2])) lnA = pm.Uniform('lnA', lower=lnAlow, upper=lnAhigh, testval=start_pt.get( 'lnA', np.random.uniform(low=lnAlow, high=lnAhigh))) A = pm.Deterministic('A', pmm.exp(lnA)) y_re, y_im = y_fd(Tobs, f0, fdot, fddot, phi0, nhat, cos_iota, psi, hbin, N) ((X_re, X_im), (Y_re, Y_im), (Z_re, Z_im)) = XYZ_freq(y_re, y_im, Tobs, hbin, N) ((A_re, A_im), (E_re, E_im), (T_re, T_im)) = AET_XYZ(X_re, X_im, Y_re, Y_im, Z_re, Z_im) A_re = pm.Deterministic('A_re', A * A_re) A_im = pm.Deterministic('A_im', A * A_im) E_re = pm.Deterministic('E_re', A * E_re) E_im = pm.Deterministic('E_im', A * E_im) snr = pm.Deterministic( 'SNR', tt.sqrt( tt.sum(tt.square(A_re / sigma)) + tt.sum(tt.square(A_im / sigma)) + tt.sum(tt.square(E_re / sigma)) + tt.sum(tt.square(E_im / sigma)))) _ = pm.Normal('A_re_obs', mu=A_re, sigma=sigma, observed=A_re_data) _ = pm.Normal('A_im_obs', mu=A_im, sigma=sigma, observed=A_im_data) _ = pm.Normal('E_re_obs', mu=E_re, sigma=sigma, observed=E_re_data) _ = pm.Normal('E_im_obs', mu=E_im, sigma=sigma, observed=E_im_data) return model
def main(tickers=['AAPL'], n_steps=21): """ Main entry point of the app """ data = OrderedDict() pred_data = OrderedDict() forecast_data = OrderedDict() for ticker in tickers: data[ticker] = fc.get_time_series(ticker)[-500:] print("{} Series\n" "-------------\n" "mean: {:.3f}\n" "median: {:.3f}\n" "maximum: {:.3f}\n" "minimum: {:.3f}\n" "variance: {:.3f}\n" "standard deviation: {:.3f}\n" "skewness: {:.3f}\n" "kurtosis: {:.3f}".format(ticker, data[ticker]['adj_close'].mean(), data[ticker]['adj_close'].median(), data[ticker]['adj_close'].max(), data[ticker]['adj_close'].min(), data[ticker]['adj_close'].var(), data[ticker]['adj_close'].std(), data[ticker]['adj_close'].skew(), data[ticker]['adj_close'].kurtosis())) data[ticker]['log_returns'] = np.log( data[ticker]['adj_close'] / data[ticker]['adj_close'].shift(1)) data[ticker]['log_returns'].dropna(inplace=True) adfstat, pvalue, critvalues, resstore, dagostino_results, shapiro_results, ks_results, anderson_results, kpss_results = fc.get_stationarity_statistics( data[ticker]['log_returns'].values) print( "{} Stationarity Statistics\n" "-------------\n" "Augmented Dickey-Fuller unit root test: {}\n" "MacKinnon’s approximate p-value: {}\n" "Critical values for the test statistic at the 1 %, 5 %, and 10 % levels: {}\n" "D’Agostino and Pearson’s normality test: {}\n" "Shapiro-Wilk normality test: {}\n" "Kolmogorov-Smirnov goodness of fit test: {}\n" "Anderson-Darling test: {}\n" "Kwiatkowski, Phillips, Schmidt, and Shin (KPSS) stationarity test: {}" .format(ticker, adfstat, pvalue, critvalues, dagostino_results, shapiro_results, ks_results, anderson_results, kpss_results)) train, test = np.arange(0, 450), np.arange( 451, len(data[ticker]['log_returns'])) n = len(train) with pm.Model() as model: sigma = pm.Exponential('sigma', 1. / .02, testval=.1) mu = pm.Normal('mu', 0, sd=5, testval=.1) nu = pm.Exponential('nu', 1. / 10) logs = pm.GaussianRandomWalk('logs', tau=sigma**-2, shape=n) # lam uses variance in pymc3, not sd like in scipy r = pm.StudentT('r', nu, mu=mu, lam=1 / exp(-2 * logs), observed=data[ticker]['log_returns'].values[train]) with model: start = pm.find_MAP(vars=[logs], fmin=sp.optimize.fmin_powell) with model: step = pm.Metropolis(vars=[logs, mu, nu, sigma], start=start) start2 = pm.sample(100, step, start=start)[-1] step = pm.Metropolis(vars=[logs, mu, nu, sigma], start=start2) trace = pm.sample(2000, step, start=start2) pred_data[ticker], vol = fc.generate_proj_returns( 1000, trace, len(test)) pred_results = pd.DataFrame( data=dict(original=data[ticker]['log_returns'][test], prediction=pred_data[ticker][1, :]), index=data[ticker]['log_returns'][test].index) print('{} Original Sharpe Ratio:'.format(ticker), fc.get_sharpe_ratio(returns=pred_results['original'])) print('{} Prediction Sharpe Ratio:'.format(ticker), fc.get_sharpe_ratio(returns=pred_results['prediction'])) fig = plt.figure() ax = fig.add_subplot(111) ax.plot(data[ticker]['log_returns'].values, color='blue') ax.plot(1 + len(train) + np.arange(0, len(test)), pred_data[ticker][1, :], color='red') ax.set( title='{} Metropolis In-Sample Returns Prediction'.format(ticker), xlabel='time', ylabel='%') ax.legend(['Original', 'Prediction']) fig.tight_layout() fig.savefig( 'charts/{}-Metropolis-In-Sample-Returns-Prediction.png'.format( ticker)) # out-of-sample test forecast_data[ticker], vol = fc.generate_proj_returns( 1000, trace, len(test) + n_steps) fig = plt.figure() ax = fig.add_subplot(111) ax.plot(forecast_data[ticker][1, :][-n_steps:]) ax.set( title='{} Day {} Metropolis Out-of-Sample Returns Forecast'.format( n_steps, ticker), xlabel='time', ylabel='%') ax.legend(['Forecast']) fig.tight_layout() fig.savefig( 'charts/{}-Day-{}-Metropolis-Out-of-Sample-Returns-Forecast.png'. format(n_steps, ticker)) fig = plt.figure() ax = fig.add_subplot(111) for ticker in tickers: ax.plot(data[ticker]['adj_close']) ax.set(title='Time series plot', xlabel='time', ylabel='$') ax.legend(tickers) fig.tight_layout() fig.savefig('charts/stocks-close-price.png') fig = plt.figure() ax = fig.add_subplot(111) for ticker in tickers: ax.plot(data[ticker]['log_returns']) ax.set(title='Time series plot', xlabel='time', ylabel='%') ax.legend(tickers) fig.tight_layout() fig.savefig('charts/stocks-close-returns.png') return forecast_data