Beispiel #1
0
    def fit(self, X, B, T):
        n, k = X.shape
        with pymc3.Model() as m:
            beta_sd = pymc3.Exponential(
                'beta_sd', 1.0)  # Weak prior for the regression coefficients
            beta = pymc3.Normal('beta', mu=0, sd=beta_sd,
                                shape=(k, ))  # Regression coefficients
            c = sigmoid(dot(X, beta))  # Conversion rates for each example
            k = pymc3.Lognormal('k', mu=0, sd=1.0)  # Weak prior around k=1
            lambd = pymc3.Exponential('lambd', 0.1)  # Weak prior

            # PDF of Weibull: k * lambda * (x * lambda)^(k-1) * exp(-(t * lambda)^k)
            LL_observed = log(c) + log(k) + log(
                lambd) + (k - 1) * (log(T) + log(lambd)) - (T * lambd)**k
            # CDF of Weibull: 1 - exp(-(t * lambda)^k)
            LL_censored = log((1 - c) + c * exp(-(T * lambd)**k))

            # We need to implement the likelihood using pymc3.Potential (custom likelihood)
            # https://github.com/pymc-devs/pymc3/issues/826
            logp = B * LL_observed + (1 - B) * LL_censored
            logpvar = pymc3.Potential('logpvar', logp.sum())

            self.trace = pymc3.sample(n_simulations=500,
                                      tune=500,
                                      discard_tuned_samples=True,
                                      njobs=1)
            print('done')
        print('done 2')
Beispiel #2
0
def hmetad_groupLevel(data: dict, sample_model: bool = True, **kwargs):
    """Compute hierachical meta-d' at the subject level.

    This is an internal function. The group level model must be
    called using :py:func:`metadPy.hierarchical.hmetad`.

    Parameters
    ----------
    data : dict
        Response data.
    sample_model : boolean
        If `False`, only the model is returned without sampling.
    **kwargs : keyword arguments
        All keyword arguments are passed to `func::pymc3.sampling.sample`.

    Returns
    -------
    model : :py:class:`pymc3.Model` instance
        The pymc3 model. Encapsulates the variables and likelihood factors.
    trace : :py:class:`pymc3.backends.base.MultiTrace` or
        :py:class:`arviz.InferenceData`
        A `MultiTrace` or `ArviZ InferenceData` object that contains the
        samples.

    References
    ----------
    .. [#] Fleming, S.M. (2017) HMeta-d: hierarchical Bayesian estimation
    of metacognitive efficiency from confidence ratings, Neuroscience of
    Consciousness, 3(1) nix007, https://doi.org/10.1093/nc/nix007
    """
    nSubj = data["nSubj"]
    hits = data["hits"]
    falsealarms = data["falsealarms"]
    s = data["s"]
    n = data["n"]
    counts = data["counts"]
    nRatings = data["nRatings"]
    Tol = data["Tol"]
    cr = data["cr"]
    m = data["m"]

    with Model() as model:

        # hyperpriors on d, c and c2
        mu_c1 = Normal(
            "mu_c1", mu=0, tau=0.01, shape=(1), testval=np.random.rand() * 0.1
        )
        mu_c2 = Normal(
            "mu_c2", mu=0, tau=0.01, shape=(1, 1), testval=np.random.rand() * 0.1
        )
        mu_d1 = Normal(
            "mu_d1", mu=0, tau=0.01, shape=(1), testval=np.random.rand() * 0.1
        )

        sigma_c1 = HalfNormal(
            "sigma_c1", tau=0.01, shape=(1), testval=np.random.rand() * 0.1
        )
        sigma_c2 = HalfNormal(
            "sigma_c2", tau=0.01, shape=(1, 1), testval=np.random.rand() * 0.1
        )
        sigma_d1 = HalfNormal(
            "sigma_d1", tau=0.01, shape=(1), testval=np.random.rand() * 0.1
        )

        # Type 1 priors
        c1_tilde = Normal("c1_tilde", mu=0, sigma=1, shape=(nSubj, 1))
        c1 = Deterministic("c1", mu_c1 + sigma_c1 * c1_tilde)

        d1_tilde = Normal("d1_tilde", mu=0, sigma=1, shape=(nSubj, 1))
        d1 = Deterministic("d1", mu_d1 + sigma_d1 * d1_tilde)

        # TYPE 1 SDT BINOMIAL MODEL
        h = cumulative_normal(d1 / 2 - c1)
        f = cumulative_normal(-d1 / 2 - c1)
        H = Binomial("H", n=s, p=h, observed=hits)
        FA = Binomial("FA", n=n, p=f, observed=falsealarms)

        # Hyperpriors on mRatio
        mu_logMratio = Normal(
            "mu_logMratio", mu=0, tau=1, shape=(1), testval=np.random.rand() * 0.1
        )
        sigma_delta = HalfNormal("sigma_delta", tau=1, shape=(1))

        delta_tilde = Normal("delta_tilde", mu=0, sigma=1, shape=(nSubj, 1))
        delta = Deterministic("delta", sigma_delta * delta_tilde)

        epsilon_logMratio = Beta("epsilon_logMratio", 1, 1, shape=(1))
        logMratio = Deterministic("logMratio", mu_logMratio + epsilon_logMratio * delta)
        mRatio = Deterministic("mRatio", math.exp(logMratio))

        # Type 2 priors
        meta_d = Deterministic("meta_d", mRatio * d1)

        # Specify ordered prior on criteria
        # bounded above and below by Type 1 c1
        cS1_hn = Normal(
            "cS1_hn",
            mu=0,
            sigma=1,
            shape=(nSubj, nRatings - 1),
            testval=np.linspace(-1.5, -0.5, nRatings - 1)
            .reshape(1, nRatings - 1)
            .repeat(nSubj, axis=0),
        )
        cS1 = Deterministic("cS1", -mu_c2 + (cS1_hn * sigma_c2))

        cS2_hn = Normal(
            "cS2_hn",
            mu=0,
            sigma=1,
            shape=(nSubj, nRatings - 1),
            testval=np.linspace(0.5, 1.5, nRatings - 1)
            .reshape(1, nRatings - 1)
            .repeat(nSubj, axis=0),
        )
        cS2 = Deterministic("cS2", mu_c2 + (cS2_hn * sigma_c2))

        # Means of SDT distributions
        S2mu = meta_d / 2
        S1mu = -meta_d / 2

        # Calculate normalisation constants
        C_area_rS1 = cumulative_normal(c1 - S1mu)
        I_area_rS1 = cumulative_normal(c1 - S2mu)
        C_area_rS2 = 1 - cumulative_normal(c1 - S2mu)
        I_area_rS2 = 1 - cumulative_normal(c1 - S1mu)

        # Get nC_rS1 probs
        nC_rS1 = cumulative_normal(cS1 - S1mu) / C_area_rS1
        nC_rS1 = Deterministic(
            "nC_rS1",
            math.concatenate(
                (
                    [
                        cumulative_normal(cS1[:, 0].reshape((nSubj, 1)) - S1mu)
                        / C_area_rS1,
                        nC_rS1[:, 1:] - nC_rS1[:, :-1],
                        (
                            (
                                cumulative_normal(c1 - S1mu)
                                - cumulative_normal(
                                    cS1[:, nRatings - 2].reshape((nSubj, 1)) - S1mu
                                )
                            )
                            / C_area_rS1
                        ),
                    ]
                ),
                axis=1,
            ),
        )

        # Get nI_rS2 probs
        nI_rS2 = (1 - cumulative_normal(cS2 - S1mu)) / I_area_rS2
        nI_rS2 = Deterministic(
            "nI_rS2",
            math.concatenate(
                (
                    [
                        (
                            (1 - cumulative_normal(c1 - S1mu))
                            - (
                                1
                                - cumulative_normal(
                                    cS2[:, 0].reshape((nSubj, 1)) - S1mu
                                )
                            )
                        )
                        / I_area_rS2,
                        nI_rS2[:, :-1]
                        - (1 - cumulative_normal(cS2[:, 1:] - S1mu)) / I_area_rS2,
                        (
                            1
                            - cumulative_normal(
                                cS2[:, nRatings - 2].reshape((nSubj, 1)) - S1mu
                            )
                        )
                        / I_area_rS2,
                    ]
                ),
                axis=1,
            ),
        )

        # Get nI_rS1 probs
        nI_rS1 = (-cumulative_normal(cS1 - S2mu)) / I_area_rS1
        nI_rS1 = Deterministic(
            "nI_rS1",
            math.concatenate(
                (
                    [
                        cumulative_normal(cS1[:, 0].reshape((nSubj, 1)) - S2mu)
                        / I_area_rS1,
                        nI_rS1[:, :-1]
                        + (cumulative_normal(cS1[:, 1:] - S2mu)) / I_area_rS1,
                        (
                            cumulative_normal(c1 - S2mu)
                            - cumulative_normal(
                                cS1[:, nRatings - 2].reshape((nSubj, 1)) - S2mu
                            )
                        )
                        / I_area_rS1,
                    ]
                ),
                axis=1,
            ),
        )

        # Get nC_rS2 probs
        nC_rS2 = (1 - cumulative_normal(cS2 - S2mu)) / C_area_rS2
        nC_rS2 = Deterministic(
            "nC_rS2",
            math.concatenate(
                (
                    [
                        (
                            (1 - cumulative_normal(c1 - S2mu))
                            - (
                                1
                                - cumulative_normal(
                                    cS2[:, 0].reshape((nSubj, 1)) - S2mu
                                )
                            )
                        )
                        / C_area_rS2,
                        nC_rS2[:, :-1]
                        - ((1 - cumulative_normal(cS2[:, 1:] - S2mu)) / C_area_rS2),
                        (
                            1
                            - cumulative_normal(
                                cS2[:, nRatings - 2].reshape((nSubj, 1)) - S2mu
                            )
                        )
                        / C_area_rS2,
                    ]
                ),
                axis=1,
            ),
        )

        # Avoid underflow of probabilities
        nC_rS1 = math.switch(nC_rS1 < Tol, Tol, nC_rS1)
        nI_rS2 = math.switch(nI_rS2 < Tol, Tol, nI_rS2)
        nI_rS1 = math.switch(nI_rS1 < Tol, Tol, nI_rS1)
        nC_rS2 = math.switch(nC_rS2 < Tol, Tol, nC_rS2)

        # TYPE 2 SDT MODEL (META-D)
        # Multinomial likelihood for response counts ordered as c(nR_S1,nR_S2)
        Multinomial(
            "CR_counts",
            cr,
            nC_rS1,
            shape=(nSubj, nRatings),
            observed=counts[:, :nRatings],
        )
        Multinomial(
            "FA_counts",
            FA,
            nI_rS2,
            shape=(nSubj, nRatings),
            observed=counts[:, nRatings : nRatings * 2],
        )
        Multinomial(
            "M_counts",
            m,
            nI_rS1,
            shape=(nSubj, nRatings),
            observed=counts[:, nRatings * 2 : nRatings * 3],
        )
        Multinomial(
            "H_counts",
            H,
            nC_rS2,
            shape=(nSubj, nRatings),
            observed=counts[:, nRatings * 3 : nRatings * 4],
        )

        if sample_model is True:

            trace = sample(return_inferencedata=True, **kwargs)

            return model, trace

        else:
            return model
Beispiel #3
0
# define the model
# \sig ~ exp(50)
#       why? stdev of returns is approx 0.02
#       stdev of exp(lam=50) = 0.2
# \nu ~ exp(0.1)
#       the DOF for the student T...which should be sample size
#       mean of exp(lam=0.1) = 10
# s_i ~ normal(s_i-1, \sig^-2)
# log(y_i) ~ studentT(\nu, 0, exp(-2s_i))
with Model() as sp500_model:
    nu = Exponential('nu', 1. / 10,
                     testval=5.)  #50, testval=5.)#results similar...
    sigma = Exponential('sigma', 1. / .02, testval=.1)
    s = GaussianRandomWalk('s', sigma**-2, shape=len(returns))
    volatility_process = Deterministic('volatility_process', exp(-2 * s))
    r = StudentT('r', nu, lam=1 / volatility_process, observed=returns)

# fit the model using NUTS
# NUTS is auto-assigned in sample()...why?
# you may get an error like:
#   WARNING (theano.gof.compilelock): Overriding existing lock by dead process '10876' (I am process '3456')
# ignore it...the process will move along
with sp500_model:
    trace = sample(2000, progressbar=False)
# plot results from model fitting...
# is there a practical reason for starting the plot from 200th sample
traceplot(trace[200:], [nu, sigma])

# plot the results: volatility inferred by the model
fig, ax = plt.subplots()  #figsize=(15, 8))
Beispiel #4
0
def make_model(A_re_data,
               A_im_data,
               E_re_data,
               E_im_data,
               Tobs,
               f0,
               fdot,
               fddot,
               sigma,
               hbin,
               lnAlow,
               lnAhigh,
               N,
               start_pt={}):
    f0_mean = f0
    fdot_mean = fdot
    fddot_mean = fddot

    with pm.Model() as model:
        _ = pm.Data('sigma', sigma)
        _ = pm.Data('hbin', hbin)
        _ = pm.Data('Tobs', Tobs)
        _ = pm.Data('N', N)
        A_re_data = pm.Data('A_re_data', A_re_data)
        A_im_data = pm.Data('A_im_data', A_im_data)
        E_re_data = pm.Data('E_re_data', E_re_data)
        E_im_data = pm.Data('E_im_data', E_im_data)

        n_phi = pm.Normal('n_phi',
                          mu=zeros(2),
                          sigma=ones(2),
                          shape=(2, ),
                          testval=start_pt.get('n_phi', randn(2)))
        phi0 = pm.Deterministic('phi0', tt.arctan2(n_phi[1], n_phi[0]))

        dphi_f0 = pm.Normal('dphi_f0', mu=0, sigma=pi, testval=0)
        dphi_fdot = pm.Normal('dphi_fdot', mu=0, sigma=pi, testval=0)
        dphi_fddot = pm.Normal('dphi_fddot', mu=0, sigma=pi, testval=0)

        f0 = pm.Deterministic('f0', f0_mean + dphi_f0 / (2 * pi * Tobs))
        fdot = pm.Deterministic('fdot',
                                fdot_mean + dphi_fdot / (pi * Tobs * Tobs))
        fddot = pm.Deterministic(
            'fddot', fddot_mean + 3.0 * dphi_fddot / (pi * Tobs * Tobs * Tobs))

        cos_iota = pm.Uniform('cos_iota',
                              lower=-1,
                              upper=1,
                              testval=start_pt.get(
                                  'cos_iota', np.random.uniform(low=-1,
                                                                high=1)))
        iota = pm.Deterministic('iota', tt.arccos(cos_iota))

        # This 2-vector gives 2*psi
        n_2psi = pm.Normal('n_2psi',
                           mu=zeros(2),
                           sigma=ones(2),
                           shape=(2, ),
                           testval=start_pt.get('n_2psi', randn(2)))
        psi = pm.Deterministic('psi', tt.arctan2(n_2psi[1], n_2psi[0]) / 2)

        n_ra_dec = pm.Normal('n_ra_dec',
                             mu=zeros(3),
                             sigma=ones(3),
                             shape=(3, ),
                             testval=start_pt.get('nhat', randn(3)))
        nhat = pm.Deterministic(
            'nhat',
            n_ra_dec / pmm.sqrt(tt.tensordot(n_ra_dec, n_ra_dec, axes=1)))
        _ = pm.Deterministic('phi', tt.arctan2(n_ra_dec[1], n_ra_dec[0]))
        _ = pm.Deterministic('theta', tt.arccos(nhat[2]))

        lnA = pm.Uniform('lnA',
                         lower=lnAlow,
                         upper=lnAhigh,
                         testval=start_pt.get(
                             'lnA', np.random.uniform(low=lnAlow,
                                                      high=lnAhigh)))
        A = pm.Deterministic('A', pmm.exp(lnA))

        y_re, y_im = y_fd(Tobs, f0, fdot, fddot, phi0, nhat, cos_iota, psi,
                          hbin, N)
        ((X_re, X_im), (Y_re, Y_im),
         (Z_re, Z_im)) = XYZ_freq(y_re, y_im, Tobs, hbin, N)
        ((A_re, A_im), (E_re, E_im),
         (T_re, T_im)) = AET_XYZ(X_re, X_im, Y_re, Y_im, Z_re, Z_im)

        A_re = pm.Deterministic('A_re', A * A_re)
        A_im = pm.Deterministic('A_im', A * A_im)
        E_re = pm.Deterministic('E_re', A * E_re)
        E_im = pm.Deterministic('E_im', A * E_im)

        snr = pm.Deterministic(
            'SNR',
            tt.sqrt(
                tt.sum(tt.square(A_re / sigma)) +
                tt.sum(tt.square(A_im / sigma)) +
                tt.sum(tt.square(E_re / sigma)) +
                tt.sum(tt.square(E_im / sigma))))

        _ = pm.Normal('A_re_obs', mu=A_re, sigma=sigma, observed=A_re_data)
        _ = pm.Normal('A_im_obs', mu=A_im, sigma=sigma, observed=A_im_data)
        _ = pm.Normal('E_re_obs', mu=E_re, sigma=sigma, observed=E_re_data)
        _ = pm.Normal('E_im_obs', mu=E_im, sigma=sigma, observed=E_im_data)

    return model
Beispiel #5
0
def main(tickers=['AAPL'], n_steps=21):
    """
    Main entry point of the app
    """
    data = OrderedDict()
    pred_data = OrderedDict()
    forecast_data = OrderedDict()

    for ticker in tickers:
        data[ticker] = fc.get_time_series(ticker)[-500:]

        print("{} Series\n"
              "-------------\n"
              "mean: {:.3f}\n"
              "median: {:.3f}\n"
              "maximum: {:.3f}\n"
              "minimum: {:.3f}\n"
              "variance: {:.3f}\n"
              "standard deviation: {:.3f}\n"
              "skewness: {:.3f}\n"
              "kurtosis: {:.3f}".format(ticker,
                                        data[ticker]['adj_close'].mean(),
                                        data[ticker]['adj_close'].median(),
                                        data[ticker]['adj_close'].max(),
                                        data[ticker]['adj_close'].min(),
                                        data[ticker]['adj_close'].var(),
                                        data[ticker]['adj_close'].std(),
                                        data[ticker]['adj_close'].skew(),
                                        data[ticker]['adj_close'].kurtosis()))

        data[ticker]['log_returns'] = np.log(
            data[ticker]['adj_close'] / data[ticker]['adj_close'].shift(1))

        data[ticker]['log_returns'].dropna(inplace=True)

        adfstat, pvalue, critvalues, resstore, dagostino_results, shapiro_results, ks_results, anderson_results, kpss_results = fc.get_stationarity_statistics(
            data[ticker]['log_returns'].values)

        print(
            "{} Stationarity Statistics\n"
            "-------------\n"
            "Augmented Dickey-Fuller unit root test: {}\n"
            "MacKinnon’s approximate p-value: {}\n"
            "Critical values for the test statistic at the 1 %, 5 %, and 10 % levels: {}\n"
            "D’Agostino and Pearson’s normality test: {}\n"
            "Shapiro-Wilk normality test: {}\n"
            "Kolmogorov-Smirnov goodness of fit test: {}\n"
            "Anderson-Darling test: {}\n"
            "Kwiatkowski, Phillips, Schmidt, and Shin (KPSS) stationarity test: {}"
            .format(ticker, adfstat, pvalue, critvalues, dagostino_results,
                    shapiro_results, ks_results, anderson_results,
                    kpss_results))

        train, test = np.arange(0, 450), np.arange(
            451, len(data[ticker]['log_returns']))
        n = len(train)

        with pm.Model() as model:
            sigma = pm.Exponential('sigma', 1. / .02, testval=.1)
            mu = pm.Normal('mu', 0, sd=5, testval=.1)

            nu = pm.Exponential('nu', 1. / 10)
            logs = pm.GaussianRandomWalk('logs', tau=sigma**-2, shape=n)

            # lam uses variance in pymc3, not sd like in scipy
            r = pm.StudentT('r',
                            nu,
                            mu=mu,
                            lam=1 / exp(-2 * logs),
                            observed=data[ticker]['log_returns'].values[train])

        with model:
            start = pm.find_MAP(vars=[logs], fmin=sp.optimize.fmin_powell)

        with model:
            step = pm.Metropolis(vars=[logs, mu, nu, sigma], start=start)
            start2 = pm.sample(100, step, start=start)[-1]

            step = pm.Metropolis(vars=[logs, mu, nu, sigma], start=start2)
            trace = pm.sample(2000, step, start=start2)

        pred_data[ticker], vol = fc.generate_proj_returns(
            1000, trace, len(test))

        pred_results = pd.DataFrame(
            data=dict(original=data[ticker]['log_returns'][test],
                      prediction=pred_data[ticker][1, :]),
            index=data[ticker]['log_returns'][test].index)

        print('{} Original Sharpe Ratio:'.format(ticker),
              fc.get_sharpe_ratio(returns=pred_results['original']))
        print('{} Prediction Sharpe Ratio:'.format(ticker),
              fc.get_sharpe_ratio(returns=pred_results['prediction']))

        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.plot(data[ticker]['log_returns'].values, color='blue')
        ax.plot(1 + len(train) + np.arange(0, len(test)),
                pred_data[ticker][1, :],
                color='red')
        ax.set(
            title='{} Metropolis In-Sample Returns Prediction'.format(ticker),
            xlabel='time',
            ylabel='%')
        ax.legend(['Original', 'Prediction'])
        fig.tight_layout()
        fig.savefig(
            'charts/{}-Metropolis-In-Sample-Returns-Prediction.png'.format(
                ticker))

        # out-of-sample test
        forecast_data[ticker], vol = fc.generate_proj_returns(
            1000, trace,
            len(test) + n_steps)

        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.plot(forecast_data[ticker][1, :][-n_steps:])
        ax.set(
            title='{} Day {} Metropolis Out-of-Sample Returns Forecast'.format(
                n_steps, ticker),
            xlabel='time',
            ylabel='%')
        ax.legend(['Forecast'])
        fig.tight_layout()
        fig.savefig(
            'charts/{}-Day-{}-Metropolis-Out-of-Sample-Returns-Forecast.png'.
            format(n_steps, ticker))

    fig = plt.figure()
    ax = fig.add_subplot(111)
    for ticker in tickers:
        ax.plot(data[ticker]['adj_close'])
    ax.set(title='Time series plot', xlabel='time', ylabel='$')
    ax.legend(tickers)
    fig.tight_layout()
    fig.savefig('charts/stocks-close-price.png')

    fig = plt.figure()
    ax = fig.add_subplot(111)
    for ticker in tickers:
        ax.plot(data[ticker]['log_returns'])
    ax.set(title='Time series plot', xlabel='time', ylabel='%')
    ax.legend(tickers)
    fig.tight_layout()
    fig.savefig('charts/stocks-close-returns.png')

    return forecast_data