Ejemplo n.º 1
0
def make_covid_model(data,
                     observe,
                     col_covid="cases_growth_US",
                     process='GRW'):
    '''
    model for Free-Scale StoVol

    :param data: observation data
    :param observe: column name of y
    :param col_covid: column name of covid data
    :param process: process of the scale paramter, can be GRW or AR1
    :return: PyMC model
    '''

    if data[col_covid].hasnans:
        raise ValueError(f"{col_covid} has NaN values")

    log_returns = data[observe].to_numpy()

    with pm.Model() as model:
        # Data
        _returns = pm.Data("_returns", log_returns)
        # _change_returns = pm.Data("_change_returns", data[observe_str], dims=observe_str, export_index_as_coords=True)
        _covid = pm.Data("covid", data[col_covid])

        # HyperPrior
        alpha = pm.Normal("alpha", mu=1, sigma=1, testval=np.random.random())
        scale = pm.GaussianRandomWalk("scale",
                                      mu=alpha * _covid,
                                      sigma=1,
                                      shape=len(data),
                                      testval=np.random.randint(
                                          low=1, high=10, size=len(data)))
        # Prior
        if process == 'GRW':  # scale follows a Gaussian Random Walk
            log_vol = pm.GaussianRandomWalk(
                "log_vol",
                sigma=scale,
                shape=len(data),
                testval=np.random.randint(low=1, high=10, size=len(data)))
        elif process == 'AR1':  # scale follows a AR1
            phi = pm.Beta("phi", alpha=20, beta=1.5)
            # phi = pm.Normal("phi", mu=1, sigma=1, testval=np.random.randint(low=1, high=10))
            log_vol = pm.AR1("log_vol",
                             k=phi,
                             tau_e=1 / pm.math.sqr(scale),
                             shape=len(data) + 1,
                             testval=np.random.randint(low=1,
                                                       high=10,
                                                       size=len(data) +
                                                       1))[:-1]
        nu = pm.Exponential("nu", 0.1)

        # Likilihood
        returns = pm.StudentT("returns",
                              nu=nu,
                              lam=np.exp(-2 * log_vol),
                              observed=_returns)
    return model
def main():
    print("main----------------------------------------------------1")

    #load data    
    returns = data.get_data_yahoo('SPY', start='2008-5-1', end='2009-12-1')['Close'].pct_change()
    returns.plot()
    plt.ylabel('daily returns in %');
    
    with pm.Model() as sp500_model:
        print("pm.Model----------------------------------------------------1")
        
        nu = pm.Exponential('nu', 1./10, testval=5.0)
        sigma = pm.Exponential('sigma', 1./0.02, testval=0.1)
        
        s = pm.GaussianRandomWalk('s', sigma**-2, shape=len(returns))                
        r = pm.StudentT('r', nu, lam=pm.math.exp(-2*s), observed=returns)
        
    
    with sp500_model:
        print("sp500_model----------------------------------------------------1")
        trace = pm.sample(2000)

    pm.traceplot(trace, [nu, sigma]);
    plt.show()
    
    plt.figure()
    returns.plot()
    plt.plot(returns.index, np.exp(trace['s',::5].T), 'r', alpha=.03)
    plt.legend(['S&P500', 'stochastic volatility process'])
    plt.show()
Ejemplo n.º 3
0
def sample_pymc(s, slot_objects_list):
    ''' Inputs:
            s                   :: int, scenario number
            slot_objects_list   :: list of Slot objects.
    Returns: 
        demands[0]              :: sample demand
        leave_samples           :: realisations for leaving glovers
        np.random.uniform(0, 1) :: probability associated with scenario s.'''
    y = [float(slot.demand) for slot in slot_objects_list]
    size = len(y)
    x = range(size)
    with pm.Model() as model:
        nu = pm.Exponential('nu', 1 / 10., testval=5.)
        sigma = pm.Exponential('sigma', 1 / 0.02, testval=.1)
        s = pm.GaussianRandomWalk('s', sd=sigma, shape=size)
        volatility_process = pm.Deterministic('volatility_process',
                                              pm.math.exp(-2 * s)**0.5)
        r = pm.StudentT('r', nu=nu, sd=volatility_process, observed=y)
    with model:
        trace = pm.sample(size)
    # demand_var = 1/np.exp(trace['s', ::5].T)
    sliced_trace = trace[size - 1:]
    demands = 1 / np.exp(sliced_trace.get_values('s', chains=1))
    leave_samples = [
        np.random.binomial(1, slot.leave) for slot in slot_objects_list
    ]
    # plt.scatter(x, y, label='Data')
    # # plt.plot(x, demand_var, 'C3', alpha=.03)
    # plt.scatter(x, demands, label='Sample')
    # plt.title("MCMC Sample with variance")
    # plt.legend()
    # plt.savefig('./output/demand_sample.eps', dpi=300,
    #             papertype='a4', format='eps')
    return demands[0], leave_samples, np.random.uniform(0, 1)
Ejemplo n.º 4
0
def bayes_multiple_detector_I(t, s, n, tracename):
    with pm.Model() as abrupt_model:
        sigma = pm.Normal('sigma', mu=30, sigma=5)
        # sigma = pm.Uniform('sigma', 5, 15)
        mu = pm.Uniform("mu1", -30, 30)
        tau = pm.DiscreteUniform("tau" + "1", t.min(), t.max())

        for i in np.arange(2, n + 2):
            _mu = pm.Uniform("mu" + str(i), -100, 0)
            mu = T.switch(tau >= t, mu, _mu)
            if (i < (n + 1)):
                tau = pm.DiscreteUniform("tau" + str(i), tau, t.max())
        # add random walk
        # sigma_rw = pm.Uniform("sigma_rw", 0, 10)
        g_rw = pm.GaussianRandomWalk("g_rw", tau=1, shape=len(s))
        s_obs = pm.Normal("s_obs", mu=g_rw + mu, sigma=sigma, observed=s)
    # g = pm.model_to_graphviz(abrupt_model)
    # g.view()
    with abrupt_model:
        pm.find_MAP()
        trace = pm.sample(5000, tune=1000)
        az.plot_trace(trace)
        plt.show()
        az.plot_autocorr(trace)
        plt.show()
        az.to_netcdf(trace, getpath('tracepath') + tracename)
        pm.summary(trace)
    return trace
Ejemplo n.º 5
0
def make_state_model_AR1(data, observe):
    '''
    model for Two-State StoVol

    :param data: observation data
    :param observe: column name of y
    :return: PyMC model
    '''
    # Prepare data
    nstate = data['covid_state_US'].nunique()
    log_returns = data[observe].to_numpy()
    state_idx = data["covid_state_US"].to_numpy()

    with pm.Model() as model:
        # Data
        _returns = pm.Data("_returns", log_returns)
        _state_idx = pm.intX(pm.Data("state_idx", state_idx))
        # Prior
        scale = pm.InverseGamma("scale", alpha=2.5, beta=0.05, shape=nstate)
        log_vol = pm.GaussianRandomWalk('log_vol',
                                        mu=0,
                                        sigma=scale[_state_idx],
                                        shape=len(data))
        nu = pm.Exponential("nu", 0.1)
        # Likelihood
        returns = pm.StudentT("returns",
                              nu=nu,
                              lam=np.exp(-2 * log_vol),
                              observed=_returns)
    return model
Ejemplo n.º 6
0
def _vol_model(df: pd.DataFrame):
    with pm.Model() as model:
        nu = pm.Exponential('nu', 1. / 10, testval=5.)
        sigma = pm.Exponential('sigma', 1. / .02, testval=.1)
        s = pm.GaussianRandomWalk('s', sigma**-2, shape=len(df.index))
        vol_process = pm.Deterministic('vol_process', pm.math.exp(-2 * s))
        r = pm.StudentT('r', nu, lam=1 / vol_process, observed=df)
    with model:
        trace = pm.sample(20000)
    return trace
Ejemplo n.º 7
0
def make_stochastic_volatility_model(data):
    with pm.Model() as model:
        step_size = pm.Exponential("step_size", 10)
        volatility = pm.GaussianRandomWalk("volatility",
                                           sigma=step_size,
                                           shape=len(data))
        nu = pm.Exponential("nu", 0.1)
        returns = pm.StudentT("returns",
                              nu=nu,
                              lam=np.exp(-2 * volatility),
                              observed=data["change"])
    return model
Ejemplo n.º 8
0
def exponential_model(training_data_df):
    logreturns = training_data_df['logret'].as_matrix()
    with pm.Model() as model_obj:
        nu = pm.Exponential('nu', 1. / 10, testval=5.)
        sigma = pm.Exponential('sigma', 1. / .02, testval=.1)
        s = pm.GaussianRandomWalk('s', sigma**-2, shape=len(logreturns))
        volatility_process = pm.Deterministic('volatility_process',
                                              pm.math.exp(-2 * s))
        r = pm.StudentT('r',
                        nu,
                        lam=1 / volatility_process,
                        observed=logreturns)
    return model_obj
Ejemplo n.º 9
0
def make_baseline_model_RW(data, observe):
    '''
    model for Random Walk StoVol

    :param data: observation data
    :param observe: column name of y
    :return: PyMC model
    '''
    with pm.Model() as model:
        # Piror
        nu = pm.Exponential("nu", 0.1)
        scale = pm.Exponential("scale", 10)
        log_vol = pm.GaussianRandomWalk("log_vol",
                                        sigma=scale,
                                        shape=len(data))
        # Likelihood
        returns = pm.StudentT("returns",
                              nu=nu,
                              lam=np.exp(-2 * log_vol),
                              observed=data[observe])
    return model
def getBayesianCone(timeSeries,
                    stepsToPredict,
                    timeSeriesSamplingDist=pm.Normal,
                    priorMean=0,
                    priorMeanDist=None,
                    priorSd=None,
                    priorSdDist=pm.HalfNormal,
                    numChains=1,
                    n_samples=200):
    df = pd.DataFrame({"y": timeSeries})
    df = df.reindex(np.arange(len(timeSeries) + stepsToPredict))
    print(df)
    sd = priorSdDist("sd", priorSd)
    if priorMeanDist:
        mu = priorMeanDist("mu", mu=priorMean, sd=sd)
    else:
        mu = priorMean
    prior = pm.GaussianRandomWalk("prior", mu=mu, sd=sd, shape=len(df))
    trace = pm.sample(chains=numChains)
    pm.traceplot(trace)
    plt.show()
    return
def MealModel(inputs={},
              evidence={},
              start={},
              t=2,
              hpfn=None,
              name="meal_normal"):
    """
    Pharmacokinetic model for glucose induced postprandial inuslin secretion.
    Further details can be found in:
    Meal simulation model of the glucose-insulin system, C. Dalla Man,
    R.A. Rizza and C. Cobelli, IEEE Trans. Biomed., 2007, 54(10), 1740-9

    The basic equations (time discretized ODEs) were taken from this paper
    and modified:
    Y(t+1) = (1-alpha*dt)*Y(t) + alpha*beta*dt*(G(t)-Gb)
    S(t) = Y(t) + K*DGintake + Sb
    I(t+1) = (1-gamma*dt)*I(t) + dt*S(t)
    G(t+1) = (1-k2*dt)*G(t) -k1*dt*I(t) + dt*DGintake(t)

    The last equation is our own addition (although based on material from
    the paper), where the plasma glucose conc (G) is given a feedback from
    plasma insulin (I) and the derivative of glucose ingested during a meal (
    DGintake)

    :param inputs: dictionary containing possible inputs to the system

    :param evidence: dict containing evidence (specify at compile time)
    This should at least contain:
    a) derivative of ingested glucose conc. (DGintake)
    NOTE: If not given, arbitrary values will be assigned.

    :param start: dict containing starting (t=0) values of timeseries that
    depend on their own past timeslices

    :param t: # of timeslices

    :param hpfn: json file containing dictionary of hyperpriors. Will raise
    error if not supplied.

    :param name: name of model (specify at compile time, suggested:
    "meal_normal" or "meal_t2d")
    """

    # input derivative of ingested glucose conc.
    # (set a dummy prior in case this is treated as an evidence)
    DGintake_prior = pm.GaussianRandomWalk("DGintake_prior",
                                           sigma=1.0,
                                           shape=t)
    DGintake = set_input("DGintake", inputs, prior=DGintake_prior)

    # "State" CPD
    # -----------------------------------
    # NOTE: Because of the feedback loop in the model equations, G,Y,I have
    # to be solved *simultaneously* as a vector called "State", i.e.
    # State = [G, Y, I]

    # input alpha
    # (set a dummy prior in case this is treated as evidence)
    alpha_default = set_hp("alpha", name, hpfn)
    alpha_prior = pm.Normal("alpha_prior",
                            mu=alpha_default,
                            sigma=set_default_sigma(alpha_default))
    alpha = set_input("alpha", inputs, prior=alpha_prior)

    # input beta
    # (set a dummy prior in case this is treated as evidence)
    beta_default = set_hp("beta", name, hpfn)
    beta_prior = pm.Normal("beta_prior",
                           mu=beta_default,
                           sigma=set_default_sigma(beta_default))
    beta = set_input("beta", inputs, prior=beta_prior)

    # input gamma
    # (set a dummy prior in case this is treated as evidence)
    gamma_default = set_hp("gamma", name, hpfn)
    gamma_prior = pm.Normal("gamma_prior",
                            mu=gamma_default,
                            sigma=set_default_sigma(gamma_default))
    gamma = set_input("gamma", inputs, prior=gamma_prior)

    # input K
    # (set a dummy prior in case this is treated as evidence)
    K_default = set_hp("K", name, hpfn)
    K_prior = pm.Normal("K_prior",
                        mu=K_default,
                        sigma=set_default_sigma(K_default))
    K = set_input("K", inputs, prior=K_prior)

    # input Gb
    # (set a dummy prior in case this is treated as evidence)
    Gb_default = set_hp("Gb", name, hpfn)
    Gb_prior = pm.Normal("Gb_prior",
                         mu=Gb_default,
                         sigma=set_default_sigma(Gb_default))
    Gb = set_input("Gb", inputs, prior=Gb_prior)

    # input Sb
    # (set a dummy prior in case this is treated as evidence)
    Sb_default = set_hp("Sb", name, hpfn)
    Sb_prior = pm.Normal("Sb_prior",
                         mu=Sb_default,
                         sigma=set_default_sigma(Gb_default))
    Sb = set_input("Sb", inputs, prior=Sb_prior)

    # input k1
    # (set a dummy prior in case this is treated as evidence)
    k1_default = set_hp("k1", name, hpfn)
    k1_prior = pm.Normal("k1_prior",
                         mu=k1_default,
                         sigma=set_default_sigma(k1_default))
    k1 = set_input("k1", inputs, prior=k1_prior)

    # input k2
    # (set a dummy prior in case this is treated as evidence)
    k2_default = set_hp("k2", name, hpfn)
    k2_prior = pm.Normal("k2_prior",
                         mu=k2_default,
                         sigma=set_default_sigma(k2_default))
    k2 = set_input("k2", inputs, prior=k2_prior)

    # State
    # dynamic parents
    dp_State = [{
        "node": "me",
        "timeslices": 0
    }, {
        "node": DGintake,
        "timeslices": [0, 1]
    }]
    # static parents
    sp_State = [alpha, beta, gamma, K, Gb, Sb, k1, k2]
    sigma_State = set_hp("sigma_State", name,
                         hpfn)  # use spherical covariances
    State = GaussianTimeSeries("State",
                               dynamic=dp_State,
                               static=sp_State,
                               fwd_model=_f_State,
                               sigma=sigma_State,
                               t=t,
                               dim=3)

    # output G, I, S (derived from elements of state)
    # these should essentially be deterministic but give them (dummy)
    # distributions with small sigma anyway
    # -----------------------------------------------------------------
    # G
    G = pm.Normal("G", mu=State[:, 0], sigma=0.001, shape=t)

    # I
    I = pm.Normal("I", mu=State[:, 2], sigma=0.001, shape=t)

    # S
    Y = State[:, 1]
    S = pm.Normal("S", mu=_f_S(Y, DGintake, K, Sb), sigma=0.001, shape=t)
Ejemplo n.º 12
0
ax = pm.energyplot(trace_1)
bfmi = pm.bfmi(trace_1)
ax.set_title(f"BFMI = {bfmi:.2f}")
plt.show()
varnames2 = ['δ', 'δB', 'δC']
tmp0 = pm.df_summary(trace_1, varnames2)
print(tmp0)

# ================================================================================
Bx_.set_value(basis_funcs(xs_yearA.get_value()))
# 建模,模型,用作算法对比,将一阶回归换成高斯游走
with pm.Model() as model_3:
    # define priors
    alpha3 = pm.HalfCauchy('alpha3', 10., testval=1.15)

    beta0 = pm.GaussianRandomWalk('beta0', sd=1, shape=Num_5)
    beta1 = pm.GaussianRandomWalk('beta1', sd=1, shape=Num_5)
    beta2 = pm.GaussianRandomWalk('beta2', sd=1, shape=Num_5)

    sigmaA = pm.Uniform('sigmaA', lower=0.01, upper=3)
    δ = pm.Normal('δ', 0, sd=sigmaA)  # 若模型收敛差则δ改用这个语句

    sigmaB = pm.Uniform('sigmaB', lower=0.01, upper=3)
    δB = pm.Normal('δB', 0, sd=sigmaB)  # 若模型收敛差则δ改用这个语句

    sigmaC = pm.Uniform('sigmaC', lower=0.01, upper=3)
    δC = pm.Normal('δC', 0, sd=sigmaC)  # 若模型收敛差则δ改用这个语句

    #     theta1 = pm.Deterministic('theta1', a0 + (σ_a * Δ_a).cumsum())
    theta1 = pm.Deterministic('theta1', (beta0).cumsum())
    #     theta = pm.Deterministic('theta',   tt.exp(Bx_.dot(theta1)   + δ  + beta*elec_tem1[0:84]+ beta1*elec_RH1[0:84]))
Ejemplo n.º 13
0
def main(tickers=['AAPL'], n_steps=21):
    """
    Main entry point of the app
    """
    data = OrderedDict()
    pred_data = OrderedDict()
    forecast_data = OrderedDict()

    for ticker in tickers:
        data[ticker] = fc.get_time_series(ticker)[-500:]

        print("{} Series\n"
              "-------------\n"
              "mean: {:.3f}\n"
              "median: {:.3f}\n"
              "maximum: {:.3f}\n"
              "minimum: {:.3f}\n"
              "variance: {:.3f}\n"
              "standard deviation: {:.3f}\n"
              "skewness: {:.3f}\n"
              "kurtosis: {:.3f}".format(ticker,
                                        data[ticker]['adj_close'].mean(),
                                        data[ticker]['adj_close'].median(),
                                        data[ticker]['adj_close'].max(),
                                        data[ticker]['adj_close'].min(),
                                        data[ticker]['adj_close'].var(),
                                        data[ticker]['adj_close'].std(),
                                        data[ticker]['adj_close'].skew(),
                                        data[ticker]['adj_close'].kurtosis()))

        data[ticker]['log_returns'] = np.log(
            data[ticker]['adj_close'] / data[ticker]['adj_close'].shift(1))

        data[ticker]['log_returns'].dropna(inplace=True)

        adfstat, pvalue, critvalues, resstore, dagostino_results, shapiro_results, ks_results, anderson_results, kpss_results = fc.get_stationarity_statistics(
            data[ticker]['log_returns'].values)

        print(
            "{} Stationarity Statistics\n"
            "-------------\n"
            "Augmented Dickey-Fuller unit root test: {}\n"
            "MacKinnon’s approximate p-value: {}\n"
            "Critical values for the test statistic at the 1 %, 5 %, and 10 % levels: {}\n"
            "D’Agostino and Pearson’s normality test: {}\n"
            "Shapiro-Wilk normality test: {}\n"
            "Kolmogorov-Smirnov goodness of fit test: {}\n"
            "Anderson-Darling test: {}\n"
            "Kwiatkowski, Phillips, Schmidt, and Shin (KPSS) stationarity test: {}"
            .format(ticker, adfstat, pvalue, critvalues, dagostino_results,
                    shapiro_results, ks_results, anderson_results,
                    kpss_results))

        train, test = np.arange(0, 450), np.arange(
            451, len(data[ticker]['log_returns']))
        n = len(train)

        with pm.Model() as model:
            sigma = pm.Exponential('sigma', 1. / .02, testval=.1)
            mu = pm.Normal('mu', 0, sd=5, testval=.1)

            nu = pm.Exponential('nu', 1. / 10)
            logs = pm.GaussianRandomWalk('logs', tau=sigma**-2, shape=n)

            # lam uses variance in pymc3, not sd like in scipy
            r = pm.StudentT('r',
                            nu,
                            mu=mu,
                            lam=1 / np.exp(-2 * logs),
                            observed=data[ticker]['log_returns'].values[train])

        with model:
            start = pm.find_MAP(vars=[logs], fmin=sp.optimize.fmin_l_bfgs_b)

        with model:
            step = pm.NUTS(vars=[logs, mu, nu, sigma],
                           scaling=start,
                           gamma=.25)
            start2 = pm.sample(100, step, start=start)[-1]

            # Start next run at the last sampled position.
            step = pm.NUTS(vars=[logs, mu, nu, sigma],
                           scaling=start2,
                           gamma=.55)
            trace = pm.sample(2000, step, start=start2)

        pred_data[ticker], vol = fc.generate_proj_returns(
            1000, trace, len(test))

        pred_results = pd.DataFrame(
            data=dict(original=data[ticker]['log_returns'][test],
                      prediction=pred_data[ticker][1, :]),
            index=data[ticker]['log_returns'][test].index)

        print('{} Original Sharpe Ratio:'.format(ticker),
              fc.get_sharpe_ratio(returns=pred_results['original']))
        print('{} Prediction Sharpe Ratio:'.format(ticker),
              fc.get_sharpe_ratio(returns=pred_results['prediction']))

        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.plot(data[ticker]['log_returns'].values, color='blue')
        ax.plot(1 + len(train) + np.arange(0, len(test)),
                pred_data[ticker][1, :],
                color='red')
        ax.set(title='{} NUTS In-Sample Returns Prediction'.format(ticker),
               xlabel='time',
               ylabel='%')
        ax.legend(['Original', 'Prediction'])
        fig.tight_layout()
        fig.savefig(
            'charts/{}-NUTS-In-Sample-Returns-Prediction.png'.format(ticker))

        # out-of-sample test
        forecast_data[ticker], vol = fc.generate_proj_returns(
            1000, trace,
            len(test) + n_steps)

        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.plot(forecast_data[ticker][1, :][-n_steps:])
        ax.set(title='{} Day {} NUTS Out-of-Sample Returns Forecast'.format(
            n_steps, ticker),
               xlabel='time',
               ylabel='%')
        ax.legend(['Forecast'])
        fig.tight_layout()
        fig.savefig(
            'charts/{}-Day-{}-NUTS-Out-of-Sample-Returns-Forecast.png'.format(
                n_steps, ticker))

    fig = plt.figure()
    ax = fig.add_subplot(111)
    for ticker in tickers:
        ax.plot(data[ticker]['adj_close'])
    ax.set(title='Time series plot', xlabel='time', ylabel='$')
    ax.legend(tickers)
    fig.tight_layout()
    fig.savefig('charts/stocks-close-price.png')

    fig = plt.figure()
    ax = fig.add_subplot(111)
    for ticker in tickers:
        ax.plot(data[ticker]['log_returns'])
    ax.set(title='Time series plot', xlabel='time', ylabel='%')
    ax.legend(tickers)
    fig.tight_layout()
    fig.savefig('charts/stocks-close-returns.png')

    return forecast_data
Ejemplo n.º 14
0
def build_model(
    observed: pandas.DataFrame,
    p_generation_time: numpy.ndarray,
    p_delay: numpy.ndarray,
    test_col: str,
    buffer_days: int = 10,
    pmodel: typing.Optional[pymc3.Model] = None,
) -> pymc3.Model:
    """ Builds the Rt.live PyMC3 model.

    Model by Kevin Systrom, Thomas Vladek and Rtlive contributors.

    Parameters
    ----------
    observed : pandas.DataFrame
        date-indexed dataframe with column "new_cases" (daily positives) 
        and a column of daily tests whose name is specified by parameter [test_col]
    p_generation_time : numpy.ndarray
        numpy array that describes the generation time distribution
    p_delay : numpy.ndarray
        numpy array that describes the testing delay distribution
    test_col : str
        name of column with daily new tests (predicted or actual data)
    buffer_days : int
        number of days to prepend before the beginning of the data
    pmodel : optional, PyMC3 model
        an existing PyMC3 model object to use (not context-activated)

    Returns
    -------
    pmodel : pymc3.Model
        the (created) PyMC3 model
    """
    observed = observed.rename(columns={test_col: "daily_tests"})
    # Reindex to make sure that there are no gaps.
    # Also add (unobserved) buffer days at the beginning.
    observed = _reindex_observed(observed, buffer_days)

    # make boolean masks to filter for dates that have case data, testcount data or both
    has_cases = ~numpy.isnan(observed.new_cases).values
    has_testcounts = ~numpy.isnan(observed.daily_tests).values
    has_data = has_cases & has_testcounts
    # masks that can be used w.r.t. subsets of the dates.
    # These are used to slice tensors that are already shorter than the full length.
    has_data_wrt_cases = has_data[has_cases]
    has_data_wrt_testcounts = has_data[has_testcounts]

    coords = {
        # this is the full lenght of dates (without gaps) covered by the generative part of the model
        "date": observed.index.values,
        # these are subsets of dates where case/testcount data is available
        "date_with_cases": observed.index.values[has_cases],
        "date_with_testcounts": observed.index.values[has_testcounts],
        # and the dates with both case & testcount data (for the likelihood)
        "date_with_data": observed.index.values[has_data],
    }
    N_dates = len(coords["date"])
    N_with_cases = len(coords["date_with_cases"])
    N_with_testcounts = len(coords["date_with_testcounts"])
    N_with_data = len(coords["date_with_data"])
    _log.info(
        "The model describes %i days of which %i have case data and %i have testcount data. %i days have both.",
        N_dates, N_with_cases, N_with_testcounts, N_with_data)

    if not pmodel:
        pmodel = pymc3.Model(coords=coords)

    with pmodel:
        # Let log_r_t walk randomly with a fixed prior of ~0.035. Think
        # of this number as how quickly r_t can react.
        log_r_t = pymc3.GaussianRandomWalk("log_r_t",
                                           sigma=0.035,
                                           dims=["date"])
        r_t = pymc3.Deterministic("r_t",
                                  pymc3.math.exp(log_r_t),
                                  dims=["date"])

        # Save data as part of trace so we can access in inference_data
        t_generation_time = pymc3.Data("p_generation_time", p_generation_time)
        # precompute generation time interval vector to speed up tt.scan
        convolution_ready_gt = _to_convolution_ready_gt(
            p_generation_time, N_dates)
        # For a given seed population and R_t curve, we calculate the
        # implied infection curve by simulating an outbreak. While this may
        # look daunting, it's simply a way to recreate the outbreak
        # simulation math inside the model:
        # https://staff.math.su.se/hoehle/blog/2020/04/15/effectiveR0.html
        seed = pymc3.Exponential("seed", 1 / 0.02)
        y0 = tt.zeros(N_dates)
        y0 = tt.set_subtensor(y0[0], seed)
        outputs, _ = theano.scan(
            fn=lambda t, gt, y, r_t: tt.set_subtensor(y[t], tt.sum(r_t * y * gt
                                                                   )),
            sequences=[tt.arange(1, N_dates), convolution_ready_gt],
            outputs_info=y0,
            non_sequences=r_t,
            n_steps=N_dates - 1,
        )
        infections = pymc3.Deterministic("infections",
                                         outputs[-1],
                                         dims=["date"])

        t_p_delay = pymc3.Data("p_delay", p_delay)
        # Convolve infections to confirmed positive reports based on a known
        # p_delay distribution. See patients.py for details on how we calculate
        # this distribution.
        test_adjusted_positive = pymc3.Deterministic(
            "test_adjusted_positive",
            theano.tensor.signal.conv.conv2d(
                tt.reshape(infections, (1, N_dates)),
                tt.reshape(t_p_delay, (1, len(p_delay))),
                border_mode="full",
            )[0, :N_dates],
            dims=["date"])

        # Picking an exposure with a prior that exposure never goes below
        # 0.1 * max_tests. The 0.1 only affects early values of Rt when
        # testing was minimal or when data errors cause underreporting
        # of tests.
        tests = pymc3.Data("tests",
                           observed.daily_tests[has_testcounts],
                           dims=["date_with_testcounts"])
        exposure = pymc3.Deterministic("exposure",
                                       pymc3.math.clip(
                                           tests,
                                           observed.daily_tests.max() * 0.1,
                                           1e9),
                                       dims=["date_with_testcounts"])

        # Test-volume adjust reported cases based on an assumed exposure
        # Note: this is similar to the exposure parameter in a Poisson
        # regression.
        positive = pymc3.Deterministic("positive",
                                       exposure *
                                       test_adjusted_positive[has_testcounts],
                                       dims=["date_with_testcounts"])
        positive_where_data = pymc3.Deterministic(
            "positive_where_data",
            positive[has_data_wrt_testcounts],
            dims=["date_with_data"])

        observed_positive = pymc3.Data("observed_positive",
                                       observed.new_cases[has_cases],
                                       dims=["date_with_cases"])
        observed_positive_where_data = pymc3.Data(
            "observed_positive_where_data",
            observed.new_cases[has_cases][has_data_wrt_cases],
            dims=["date_with_data"])

        likelihood = pymc3.NegativeBinomial(
            "likelihood",
            mu=positive_where_data,
            alpha=pymc3.Gamma("alpha", mu=6, sigma=1),
            observed=observed_positive_where_data,
            dims=["date_with_data"])
    return pmodel
Ejemplo n.º 15
0
import pymc3 as pm
import theano.tensor as tt

cutoff_idx = 1000
y_obs = np.ma.MaskedArray(Y, np.arange(N) > cutoff_idx)

print "y_obs.size = ", y_obs.size

interval = 200
with pm.Model() as OUmodel:

    sigma_mu = pm.HalfNormal('s_mu', sd=0.01)   # this must be sufficiently wide, considering that it's the sd
                                                # for a whole (y_obs.size // interval) period
    sigma = pm.HalfNormal('s', sd=0.01)

    mu = pm.GaussianRandomWalk('mu', mu=0.0, sd=sigma_mu, shape=interval)
    weights = tt.repeat(mu, y_obs.size // interval)

    offset_weights = pm.Deterministic('mu_offset', 1.0 + weights)
    y = pm.Normal('y', mu=offset_weights, sd=sigma, observed=y_obs)

    trace = pm.sample(1000, tune=1000)

    pm.traceplot(trace, varnames=[mu, sigma, sigma_mu])
    plt.show()

# Generate imagined trajectories and train agent on them
print "Generating data..."
simulations = []
for _ in range(NUM_SIMULATIONS):
Ejemplo n.º 16
0
import pymc3 as pm
import numpy as np
import matplotlib.pylab as plt

data = np.ma.masked_values([42] * 100 + [-1] * 100 + [42] * 100, value=-1)

with pm.Model() as model:
    s = pm.GaussianRandomWalk('s', sd=1e-2, shape=len(data))
    n = pm.Normal('n', mu=pm.math.exp(s), observed=data)

    trace = pm.sample()

plt.plot(trace['s'].T, alpha=0.1)

with pm.Model() as model:
    s = pm.GaussianRandomWalk('s', sd=1e-2, shape=len(data))
    n = pm.Poisson('n', mu=pm.math.exp(s), observed=data)
    step = pm.Slice(vars=model.vars[1])
    trace = pm.sample(step=step)

pm.traceplot(trace)
plt.plot(trace['s'].T, alpha=0.1)
# The Ergotic Theorem for Markov Chains allows us to find the posterior distribution
# by simulating a large sample size with  Monte Carlo
with pm.Model() as gs_model:
    
    # Assume returns follow a log-normal distribution, 
    # common assumption for stock returns because it can account for the skew
    # semi-informed with SD of fitted lognorm likelihood estimation
    mu = pm.Lognormal('mu', sigma=lognorm_param[0])
    
    PositiveNormal = pm.Bound(pm.Normal, lower=0.0)
    sigma = PositiveNormal('sigma', mu=np.std(y), sigma=sd_sd)
    
    # Assume prior returns follows a gaussian random walk because stock returns are nonstationary
    # so this helps models the stochastic process
    # semi-informed with SD likelihood estimation
    returns = pm.GaussianRandomWalk('returns', mu=mu, sigma=sigma, shape=len(y))
    
    # Assume shape follows a positive normal distribution centered around the prior shape
    # semi-informed with SD equal to the 0.5
    shape = PositiveNormal('shape', mu=logistic_param[1], sd=0.5)
    
    # Likelihood function of observed data follows a logistic distribution based on distribution fitting
    obs = pm.Logistic('obs', mu=returns, s=shape, observed=y)
    
    # MCMC sampling methods
    # step = pm.Metropolis() # Metropolis-Hastings is often not as accurate, but is fast
    step = pm.NUTS() # No U-turn Sampler is more accurate, but takes longer
    trace = pm.sample(10000, step=step)
    
    # Traceplot
    pm.traceplot(trace)
Ejemplo n.º 18
0
n_hidden = [2, 5]
interval = 20

# Initialize random weights between each layer
init_1 = np.random.randn(X.shape[1], n_hidden[0]).astype(theano.config.floatX)
init_2 = np.random.randn(n_hidden[0], n_hidden[1]).astype(theano.config.floatX)
init_out = np.random.randn(n_hidden[1]).astype(theano.config.floatX)

with pm.Model() as neural_network:

    step_size = pm.HalfNormal('step_size', sd=np.ones(n_hidden[0]), shape=n_hidden[0])

    # Weights from input to hidden layer
    weights_1 = pm.GaussianRandomWalk('layer1', sd=step_size,
                                    shape=(interval, X.shape[1], n_hidden[0]),
                                    testval=np.tile(init_1, (interval, 1, 1)))
    weights_1_rep = tt.repeat(weights_1, ann_input.shape[0] // interval, axis=0)

    weights_2 = pm.Normal('layer2', mu=0, sd=1.,
                        shape=(1, n_hidden[0], n_hidden[1]),
                        testval=init_2)
    weights_2_rep = tt.repeat(weights_2, ann_input.shape[0], axis=0)

    weights_out = pm.Normal('layer_out', mu=0, sd=1.,
                          shape=(1, n_hidden[1]),
                          testval=init_out)
    weights_out_rep = tt.repeat(weights_out, ann_input.shape[0], axis=0)

    intercept = pm.Normal('intercept', mu=0, sd=10.)
Ejemplo n.º 19
0
    def build(self):
        """ Builds and returns the Generative model. Also sets self.model """

        p_delay = get_delay_distribution()
        nonzero_days = self.observed.total.gt(0)
        len_observed = len(self.observed)
        convolution_ready_gt = self._get_convolution_ready_gt(len_observed)
        x = np.arange(len_observed)[:, None]

        coords = {
            "date": self.observed.index.values,
            "nonzero_date":
            self.observed.index.values[self.observed.total.gt(0)],
        }
        with pm.Model(coords=coords) as self.model:

            # Let log_r_t walk randomly with a fixed prior of ~0.035. Think
            # of this number as how quickly r_t can react.
            log_r_t = pm.GaussianRandomWalk("log_r_t",
                                            sigma=0.035,
                                            dims=["date"])
            r_t = pm.Deterministic("r_t", pm.math.exp(log_r_t), dims=["date"])

            # For a given seed population and R_t curve, we calculate the
            # implied infection curve by simulating an outbreak. While this may
            # look daunting, it's simply a way to recreate the outbreak
            # simulation math inside the model:
            # https://staff.math.su.se/hoehle/blog/2020/04/15/effectiveR0.html
            seed = pm.Exponential("seed", 1 / 0.02)
            y0 = tt.zeros(len_observed)
            y0 = tt.set_subtensor(y0[0], seed)
            outputs, _ = theano.scan(
                fn=lambda t, gt, y, r_t: tt.set_subtensor(
                    y[t], tt.sum(r_t * y * gt)),
                sequences=[tt.arange(1, len_observed), convolution_ready_gt],
                outputs_info=y0,
                non_sequences=r_t,
                n_steps=len_observed - 1,
            )
            infections = pm.Deterministic("infections",
                                          outputs[-1],
                                          dims=["date"])

            # Convolve infections to confirmed positive reports based on a known
            # p_delay distribution. See patients.py for details on how we calculate
            # this distribution.
            test_adjusted_positive = pm.Deterministic(
                "test_adjusted_positive",
                conv2d(
                    tt.reshape(infections, (1, len_observed)),
                    tt.reshape(p_delay, (1, len(p_delay))),
                    border_mode="full",
                )[0, :len_observed],
                dims=["date"])

            # Picking an exposure with a prior that exposure never goes below
            # 0.1 * max_tests. The 0.1 only affects early values of Rt when
            # testing was minimal or when data errors cause underreporting
            # of tests.
            tests = pm.Data("tests", self.observed.total.values, dims=["date"])
            exposure = pm.Deterministic("exposure",
                                        pm.math.clip(
                                            tests,
                                            self.observed.total.max() * 0.1,
                                            1e9),
                                        dims=["date"])

            # Test-volume adjust reported cases based on an assumed exposure
            # Note: this is similar to the exposure parameter in a Poisson
            # regression.
            positive = pm.Deterministic("positive",
                                        exposure * test_adjusted_positive,
                                        dims=["date"])

            # Save data as part of trace so we can access in inference_data
            observed_positive = pm.Data("observed_positive",
                                        self.observed.positive.values,
                                        dims=["date"])
            nonzero_observed_positive = pm.Data(
                "nonzero_observed_positive",
                self.observed.positive[nonzero_days.values].values,
                dims=["nonzero_date"])

            positive_nonzero = pm.NegativeBinomial(
                "nonzero_positive",
                mu=positive[nonzero_days.values],
                alpha=pm.Gamma("alpha", mu=6, sigma=1),
                observed=nonzero_observed_positive,
                dims=["nonzero_date"])

        return self.model
def SPTModel(inputs={}, evidence={}, start={}, t=2, hpfn=None, name="spt"):
    """
    (SP)tio-(T)emporal model representing a simplified version of a Brownian
    Dynamics simulation in a toy model of the beta cell. Rate of secretion of
    insulin granlues (ISG)s from the cell per unit time is simply a linear
    gaussian combination of the model parameters. Hence, this is a toy
    realization of what is actually a many-body particle solver. 

    The basic equations (time discretized ODEs) are:
    S(t+1) = w1*S(t) + w2*G_in(t) + w3*k + w4*Npatch + w5*Nisg + w6*Ninsulin +
             w7*Disg + w8*Rpbc

    I(t+1) = w9*I(t) + w10*S(t)

    where, w's are weights that are supplied by the user. In principle these
    should be variable and amenable to Bayesian estimation, but for now they
    are constant (i.e. "baked" into the model definition)

    Random variable (RV)s that depend on previous timeslices are represented
    using the custom GaussianTimeSeries CPD while those without such
    dependencies can simply use the usual PyMC3 RV algebra. All functions
    governing time evolution of RVs are given as private functions of the form
    _f_X() where X = RV in question.

    :param inputs: dictionary containing possible inputs to the system
    This should at least contain:
    a) input glucose conc. (G_in)
    b) actin force constant (k)
    c) number of patches on insulin secretory granules (Npatch)
    d) number of insulin secretory granules (Nisg)
    e) number of insulin molecules in a granule (Ninsulin)
    f) diffusion coefficient of granules (Disg)
    e) radius of the beta cell (Rpbc)
    NOTE: If not given, arbitrary values will be assigned.

    :param evidence: dict containing evidence (specify at compile time)

    :param start: dict containing starting (t=0) values of timeseries that
    depend on their own past timeslices

    :param t: # of timeslices

    :param hpfn: json file containing dictionary of hyperpriors. Will raise
    error if not supplied

    :param name: name of model (specify at compile time, suggested: "spt")
    """

    # S CPD
    # -----------------------------------
    # input glucose profile
    # (set a dummy prior in case this is treated as an evidence)
    G_in_prior = pm.GaussianRandomWalk("G_in_prior", sigma=1.0, shape=t)
    G_in = set_input("G_in", inputs, prior=G_in_prior)

    # input actin force constant
    # (set a dummy prior in case this is treated as evidence)
    k_default = set_hp("k", name, hpfn)
    k_prior = pm.Normal("k_prior",
                        mu=k_default,
                        sigma=set_default_sigma(k_default))
    k = set_input("k", inputs, prior=k_prior)

    # input patch density on insulin granules
    # (set a dummy prior in case this is treated as evidence)
    Npatch_default = set_hp("Npatch", name, hpfn)
    Npatch_prior = pm.Normal("Npatch_prior",
                             mu=Npatch_default,
                             sigma=set_default_sigma(Npatch_default))
    Npatch = set_input("Npatch", inputs, prior=Npatch_prior)

    # input number of insulin granules
    # (set a dummy prior in case this is treated as evidence)
    Nisg_default = set_hp("Nisg", name, hpfn)
    Nisg_prior = pm.Normal("Nisg_prior",
                           mu=Nisg_default,
                           sigma=set_default_sigma(Nisg_default))
    Nisg = set_input("Nisg", inputs, prior=Nisg_prior)

    # input insulin molecule density inside a granule
    # (set a dummy prior in case this is treated as evidence)
    Ninsulin_default = set_hp("Ninsulin", name, hpfn)
    Ninsulin_prior = pm.Normal("Ninsulin_prior",
                               mu=Ninsulin_default,
                               sigma=set_default_sigma(Ninsulin_default))
    Ninsulin = set_input("Ninsulin", inputs, prior=Ninsulin_prior)

    # input granule diffusion coefficient
    # (set a dummy prior in case this is treated as evidence)
    Disg_default = set_hp("Disg", name, hpfn)
    Disg_prior = pm.Normal("Disg_prior",
                           mu=Disg_default,
                           sigma=set_default_sigma(Disg_default))
    Disg = set_input("Disg", inputs, prior=Disg_prior)

    # input beta cell radius
    # (set a dummy prior in case this is treated as evidence)
    Rpbc_default = set_hp("Rpbc", name, hpfn)
    Rpbc_prior = pm.Normal("Rpbc_prior",
                           mu=Rpbc_default,
                           sigma=set_default_sigma(Rpbc_default))
    Rpbc = set_input("Rpbc", inputs, prior=Rpbc_prior)

    # ATP
    # dynamic parents
    dp_S = [{"node": "me", "timeslices": 0}, {"node": G_in, "timeslices": 0}]
    # static parents
    sp_S = [k, Npatch, Nisg, Ninsulin, Disg, Rpbc]
    sigma_S = set_hp("sigma_S", name, hpfn)
    S = GaussianTimeSeries("S",
                           dynamic=dp_S,
                           static=sp_S,
                           fwd_model=_f_S,
                           sigma=sigma_S,
                           t=t)

    # I CPD
    # -----------------------------------
    # dynamic parents (no static parents)
    dp_I = [{"node": "me", "timeslices": 0}, {"node": S, "timeslices": 0}]
    sigma_I = set_hp("sigma_I", name, hpfn)
    I = GaussianTimeSeries("I",
                           dynamic=dp_I,
                           static=[],
                           fwd_model=_f_I,
                           sigma=sigma_I,
                           t=t)
Ejemplo n.º 21
0
def NetworkModel(inputs={}, evidence={}, start={}, t=2, hpfn=None, name="net"):
    """
    Simple network model that coarse-grains the glycolytic, mitochondrial
    and GLP-1 mediated cAMP pathways to produce a feedback-less network
    connecting input cellular glucose input to insulin secretion. This model
    is a coarse-grained version of the INSULIN SECRETION KEGG pathway
    (https://www.kegg.jp/kegg-bin/highlight_pathway?scale=1.0&map=map04911&keyword=insulin)

    The basic equations (time discretized ODEs) are:
    ATP(t+1) = w1*ATP(t) + w2*PFK_activity*G_in(t)
    GLP1(t+1) = w3*GLP1(t)
    GLP1R(t) = w4*GLP1R_ext(t) + (w5/GLP1_activity) * GLP1(t)
    cAMP(t+1) = w6*cAMP(t) + w7*ATP(t) + w8*GLP1R(t)
    Ca(t+1) = w9*Ca(t) + w10*cAMP(t)
    S(t) = w11*Ca(t)
    I(t+1) = w12*I(t) + w13*S(t)

    where, w's are weights that are supplied by the user. In principle these
    should be variable and amenable to Bayesian estimation, but for now they
    are constant (i.e. "baked" into the model definition)

    Random variable (RV)s that depend on previous timeslices are represented
    using the custom GaussianTimeSeries CPD while those without such
    dependencies can simply use the usual PyMC3 RV algebra. All functions
    governing time evolution of RVs are given as private functions of the form
    _f_X() where X = RV in question.

    :param inputs: dictionary containing possible inputs to the system
    This should at least contain:
    a) input glucose conc. (G_in)
    b) GLP1R conc. (GLP1R_ext)
    c) PFK_activity
    d) GLP1_activity
    NOTE: If not given, arbitrary values will be assigned.

    :param evidence: dict containing evidence (specify at compile time)

    :param start: dict containing starting (t=0) values of timeseries that
    depend on their own past timeslices

    :param t: # of timeslices

    :param hpfn: json file containing dictionary of hyperpriors. Will raise
    error if not supplied

    :param name: name of model (specify at compile time, suggested: "net")
    """

    # ATP CPD
    # -----------------------------------
    # input glucose profile
    # (set a dummy prior in case this is treated as an evidence)
    G_in_prior = pm.GaussianRandomWalk("G_in_prior", sigma=1.0, shape=t)
    G_in = set_input("G_in", inputs, prior=G_in_prior)

    # input PFK activity
    # (set a dummy prior in case this is treated as evidence)
    PFK_activity_default = set_hp("PFK_activity", name, hpfn)
    PFK_activity_prior = pm.Normal(
        "PFK_activity_prior",
        mu=PFK_activity_default,
        sigma=set_default_sigma(PFK_activity_default))
    PFK_activity = set_input("PFK_activity", inputs, prior=PFK_activity_prior)

    # ATP
    # dynamic parents
    dp_ATP = [{"node": "me", "timeslices": 0}, {"node": G_in, "timeslices": 0}]
    # static parents
    sp_ATP = PFK_activity
    sigma_ATP = set_hp("sigma_ATP", name, hpfn)
    ATP = GaussianTimeSeries("ATP",
                             dynamic=dp_ATP,
                             static=sp_ATP,
                             fwd_model=_f_ATP,
                             sigma=sigma_ATP,
                             t=t)

    # GLP1 CPD
    # ---------------------------------------
    # dynamic parents (no static parents)
    dp_GLP1 = {"node": "me", "timeslices": 0}
    sigma_GLP1 = set_hp("sigma_GLP1", name, hpfn)
    GLP1 = GaussianTimeSeries("GLP1",
                              dynamic=dp_GLP1,
                              static=[],
                              fwd_model=_f_GLP1,
                              sigma=sigma_GLP1,
                              t=t)

    # GLP1R CPD
    # ----------------------------------------
    # input external GLP1R profile
    # (set a dummy prior in case this is treated as an evidence)
    GLP1R_ext_prior = pm.GaussianRandomWalk("GLP1R_ext_prior",
                                            sigma=1.0,
                                            shape=t)
    GLP1R_ext = set_input("GLP1R_ext", inputs, prior=GLP1R_ext_prior)

    # input GLP1 activity
    # (set a dummy prior in this case this is treated as evidence)
    GLP1_activity_default = set_hp("GLP1_activity", name, hpfn)
    GLP1_activity_prior = pm.Normal(
        "GLP1_activity_prior",
        mu=GLP1_activity_default,
        sigma=set_default_sigma(GLP1_activity_default))
    GLP1_activity = set_input("GLP1_activity",
                              inputs,
                              prior=GLP1_activity_prior)

    # GLP1R
    # (use standard PyMC3 RV algebra since no dependence on past timesteps)
    sigma_GLP1R = set_hp("sigma_GLP1R", name, hpfn)
    GLP1R = pm.Normal("GLP1R",
                      mu=_f_GLP1R(GLP1R_ext, GLP1, GLP1_activity),
                      sigma=sigma_GLP1R,
                      shape=t)

    # cAMP CPD
    # -----------------------------------
    # dynamic parents (no static parents)
    dp_cAMP = [{
        "node": "me",
        "timeslices": 0
    }, {
        "node": ATP,
        "timeslices": 0
    }, {
        "node": GLP1R,
        "timeslices": 0
    }]
    sigma_cAMP = set_hp("sigma_cAMP", name, hpfn)
    cAMP = GaussianTimeSeries("cAMP",
                              dynamic=dp_cAMP,
                              static=[],
                              fwd_model=_f_cAMP,
                              sigma=sigma_cAMP,
                              t=t)

    # Ca CPD
    # -----------------------------------
    # dynamic parents (no static parents)
    dp_Ca = [{"node": "me", "timeslices": 0}, {"node": cAMP, "timeslices": 0}]
    sigma_Ca = set_hp("sigma_Ca", name, hpfn)
    Ca = GaussianTimeSeries("Ca",
                            dynamic=dp_Ca,
                            static=[],
                            fwd_model=_f_Ca,
                            sigma=sigma_Ca,
                            t=t)

    # S CPD
    # -----------------------------------
    # (use standard PyMC3 RV algebra since no dependence on past timesteps)
    sigma_S = set_hp("sigma_S", name, hpfn)
    S = pm.Normal("S", mu=_f_S(Ca), sigma=sigma_S, shape=t)

    # I CPD
    # -----------------------------------
    # dynamic parents (no static parents)
    dp_I = [{"node": "me", "timeslices": 0}, {"node": S, "timeslices": 0}]
    sigma_I = set_hp("sigma_I", name, hpfn)
    I = GaussianTimeSeries("I",
                           dynamic=dp_I,
                           static=[],
                           fwd_model=_f_I,
                           sigma=sigma_I,
                           t=t)
Ejemplo n.º 22
0
#StudentT Timeseries Model
#Reference1: https://docs.pymc.io/notebooks/getting_started.html#Case-study-1:-Stochastic-volatility
#Reference2: https://docs.pymc.io/notebooks/stochastic_volatility.html#Stochastic-Volatility-model
#data
returns = pd.read_csv(pm.get_data('SP500.csv'), parse_dates=True, index_col=0)
dates=returns.index.strftime("%Y/%m/%d").tolist()

#model-inference
fileName='stochastic_volatility_PyMC3'
samples=2000
tune=2000
chains=2
coords = {"date": dates}
with pm.Model(coords=coords) as model:
	step_size = pm.Exponential('step_size', 10)
    volatility = pm.GaussianRandomWalk('volatility', sigma=step_size, dims='date')
    nu = pm.Exponential('nu', 0.1)
    returns = pm.StudentT('returns',nu=nu,lam=np.exp(-2*volatility) ,observed=data["change"], dims='date')
	#inference
	trace = pm.sample(draws=samples, chains=chains, tune=tune)
    prior = pm.sample_prior_predictive(samples=samples)
    posterior_predictive = pm.sample_posterior_predictive(trace,samples=samples)    

## STEP 1
# will also capture all the sampler statistics
data = az.from_pymc3(trace=trace, prior=prior, posterior_predictive=posterior_predictive)

## STEP 2
#dag
dag = get_dag(stochastic_vol_model)
# insert dag into sampler stat attributes
Ejemplo n.º 23
0
import pymc3 as pm
import numpy as np
import tushare as ts
import matplotlib.pyplot as plt

gdp_year = ts.get_gdp_year()
gdp_year.set_index('year')
gdp_year = gdp_year[::-1]

gdp_year['gdp'] = gdp_year['gdp'].apply(lambda x: x/1000)

gdp_year['lag'] = gdp_year['gdp'].shift()

gdp_year.dropna(inplace=True)
with pm.Model() as model:
    sigma = pm.Exponential('sigma', 1. / .02, testval=.1)
    nu = pm.Exponential('nu', 1. / 10)
    beta = pm.GaussianRandomWalk('beta', sigma ** -2, shape=len(gdp_year['gdp']))
    observed = pm.Normal('observed', mu=beta * gdp_year['lag'], sd=1 / nu, observed=gdp_year['gdp'])

    trace = pm.sample(1000, tune=1000, cores=2)

plt.plot(gdp_year.index,trace['beta'].T, 'b', alpha=.03)
plt.plot(gdp_year.index, 1 + (np.log(gdp_year['gdp']) - np.log(gdp_year['lag'])), 'r', label='True Growth Rate')
plt.show()
Ejemplo n.º 24
0
from pandas_datareader import data

import pandas as pd
returns = data.get_data_yahoo('SPY', start='2008-5-1',
                              end='2009-12-1')['Close'].pct_change()
len(returns)

returns.plot(figsize=(10, 6))
plt.ylabel('daily returns in %')

with pm.Model() as sp500_model:
    nu = pm.Exponential('nu', 1. / 10, testval=5.)
    sigma = pm.Exponential('sigma', 1. / .02, testval=.1)

    s = pm.GaussianRandomWalk('s', sigma**-2, shape=len(returns))
    volatility_process = pm.Deterministic('volatility_process',
                                          pm.math.exp(-2 * s))

    r = pm.StudentT('r', nu, lam=1 / volatility_process, observed=returns)

with sp500_model:
    trace = pm.sample(2000)

pm.traceplot(trace, [nu, sigma])

fig, ax = plt.subplots(figsize=(15, 8))
returns.plot(ax=ax)
ax.plot(returns.index, 1 / np.exp(trace['s', ::5].T), 'r', alpha=0.03)
ax.set(title='volatility_process', xlabel='time', ylabel='volatility')
ax.legend(['S&P500', 'stochastic volatility_process'])
returns = pd.read_csv(
    pm.get_data("SP500.csv"), parse_dates=True, index_col=0, usecols=["Date", "change"]
).query("Date < '2009-12-31'")
returns

# %%
returns.plot(figsize=(10, 6))
plt.ylabel("daily returns in %")

# %%
with pm.Model() as sp500_model:
    nu = pm.Exponential("nu", 1 / 10.0, testval=5.0)
    sigma = pm.Exponential("sigma", 1 / 0.02, testval=0.1)

    s = pm.GaussianRandomWalk("s", sigma=sigma, shape=len(returns))
    volatility_process = pm.Deterministic(
        "volatility_process", pm.math.exp(-2 * s) ** 0.5
    )

    r = pm.StudentT("r", nu=nu, sigma=volatility_process, observed=returns["change"])

# %%
pm.model_to_graphviz(sp500_model)

# %%
with sp500_model:
    trace = pm.sample(2000)

# %%
pm.traceplot(trace)
Ejemplo n.º 26
0
estimated_corrs = np.repeat(np.mean(trace['corr'], axis=0), 10)

plt.plot(estimated_corrs, color='r')
plt.plot(corrs, color='b')
plt.show()

# Clearly in V2 the correlation is not stable. Let's model it as a random walk and examine the stability of its std. dev.
with pm.Model() as model3:

    def custom_likelihood(x_diffs, y_obs_last, y_obs):
        expected = y_obs_last - corr * x_diffs
        return pm.Normal.dist(mu=expected, sd=0.1).logp(y_obs)

    step_size = pm.Uniform('step_size', lower=0.0001, upper=0.1, shape=999)
    corr = pm.GaussianRandomWalk('corr', mu=0, sd=step_size, shape=1000)
    corr = tt.repeat(corr, 10)

    pm.DensityDist('obs',
                   custom_likelihood,
                   observed={
                       'x_diffs': (x2[:-1] - x2[1:]),
                       'y_obs_last': y2[:-1],
                       'y_obs': y2[1:]
                   })

    mean_field = pm.fit(n=5000,
                        method='advi',
                        obj_optimizer=pm.adam(learning_rate=0.01))
    trace = mean_field.sample(1000)
Ejemplo n.º 27
0
def model_evolving(mcmc_in, brand_level=True, asymmetric_accuracy=True, draws=500, tune=500):
    interval = 20
    tt_id = mcmc_in.t_id//interval
    # a = np.array([[2,1],[2,1]])
    j_len = (pd.Series(mcmc_in.jj).value_counts()//interval)+1
    j_len_max = max(j_len)

    model = pm.Model()

    with model:
        # the true labels
        rho_prior = np.ones((1,2))
        rho = pm.Dirichlet('rho', a=rho_prior, shape=(mcmc_in.K,2))
        z = pm.Categorical('z', 
                            p=rho[mcmc_in.kk_lkup], 
                            observed=np.ma.masked_values(mcmc_in.z_obs, value=-999),
                            testval=mcmc_in.z_init,
                            shape=mcmc_in.N)

        # credibilities
        if asymmetric_accuracy==False:
            volatility = pm.HalfNormal('volatility', sd=0.75, shape=mcmc_in.J, testval=0.75*T.ones(mcmc_in.J))
            alpha_walk = [T.concatenate([
                    pm.GaussianRandomWalk('alpha_walk{0}'.format(j),  
                    sd=volatility[j],
                    shape=j_len[j], 
                    init=pm.Normal.dist(0.5,1),
                    testval=T.ones(j_len[j])),
                        T.ones(j_len_max-j_len[j])]) for j in range(mcmc_in.J)]
            alpha_walk = T.as_tensor_variable(alpha_walk) 
            if brand_level==True:
                beta_prime = pm.Normal('beta_prime', mu=0, sd=0.4, shape=mcmc_in.K)
                def logp(r, z=z, alpha_walk=alpha_walk):
                    out = T.switch(T.eq(z[mcmc_in.ii],r),
                                  -1*T.log(1+T.exp(-1*alpha_walk[mcmc_in.jj,tt_id]*T.exp(beta_prime[mcmc_in.kk]))),
                                  -1*alpha_walk[mcmc_in.jj,tt_id]*T.exp(beta_prime[mcmc_in.kk]) - 1*T.log(1+T.exp(-1*alpha_walk[mcmc_in.jj,tt_id]*T.exp(beta_prime[mcmc_in.kk])))
                                  )
                    return T.sum(out)
            elif brand_level==False:
                beta_prime = pm.Normal('beta_prime', mu=0, sd=0.4, shape=mcmc_in.N)
                def logp(r, z=z, alpha_walk=alpha_walk):
                    out = T.switch(T.eq(z[mcmc_in.ii],r),
                                  -1*T.log(1+T.exp(-1*alpha_walk[mcmc_in.jj,tt_id]*T.exp(beta_prime[mcmc_in.ii]))),
                                  -1*alpha_walk[mcmc_in.jj,tt_id]*T.exp(beta_prime[mcmc_in.ii]) - 1*T.log(1+T.exp(-1*alpha_walk[mcmc_in.jj,tt_id]*T.exp(beta_prime[mcmc_in.ii])))
                                  )
                    return T.sum(out)

        elif asymmetric_accuracy==True:
            volatility0 = pm.HalfNormal('volatility0', sd=0.75, shape=mcmc_in.J, testval=0.75*T.ones(mcmc_in.J))
            alpha_walk0 = [T.concatenate([
                    pm.GaussianRandomWalk('alpha_walk0{0}'.format(j),  
                    sd=volatility0[j],
                    shape=j_len[j], 
                    init=pm.Normal.dist(0.5,1),
                    testval=T.ones(j_len[j])),
                        T.ones(j_len_max-j_len[j])]) for j in range(mcmc_in.J)]
            alpha_walk0 = T.as_tensor_variable(alpha_walk0) 
            
            volatility1 = pm.HalfNormal('volatility1', sd=0.75, shape=mcmc_in.J, testval=0.75*T.ones(mcmc_in.J))
            alpha_walk1 = [T.concatenate([
                    pm.GaussianRandomWalk('alpha_walk1{0}'.format(j),  
                    sd=volatility1[j],
                    shape=j_len[j], 
                    init=pm.Normal.dist(0.5,1),
                    testval=T.ones(j_len[j])),
                        T.ones(j_len_max-j_len[j])]) for j in range(mcmc_in.J)]
            alpha_walk1 = T.as_tensor_variable(alpha_walk1) 
            
            alpha_walk = T.as_tensor_variable([alpha_walk0,alpha_walk1])
            
            if brand_level==True:
                beta_prime = pm.Normal('beta_prime', mu=0, sd=0.4, shape=mcmc_in.K)
                def logp(r, z=z, alpha_walk=alpha_walk):
                    out = T.switch(T.eq(z[mcmc_in.ii],r),
                                  -1*T.log(1+T.exp(-1*alpha_walk[z[mcmc_in.ii],mcmc_in.jj,tt_id]*T.exp(beta_prime[mcmc_in.kk]))),
                                  -1*alpha_walk[z[mcmc_in.ii],mcmc_in.jj,tt_id]*T.exp(beta_prime[mcmc_in.kk]) - 1*T.log(1+T.exp(-1*alpha_walk[z[mcmc_in.ii],mcmc_in.jj,tt_id]*T.exp(beta_prime[mcmc_in.kk])))
                                  )
                    return T.sum(out)
            elif brand_level==False:
                beta_prime = pm.Normal('beta_prime', mu=0, sd=0.4, shape=mcmc_in.N)
                def logp(r, z=z, alpha_walk=alpha_walk):
                    out = T.switch(T.eq(z[mcmc_in.ii],r),
                                  -1*T.log(1+T.exp(-1*alpha_walk[z[mcmc_in.ii],mcmc_in.jj,tt_id]*T.exp(beta_prime[mcmc_in.ii]))),
                                  -1*alpha_walk[z[mcmc_in.ii],mcmc_in.jj,tt_id]*T.exp(beta_prime[mcmc_in.ii]) - 1*T.log(1+T.exp(-1*alpha_walk[z[mcmc_in.ii],mcmc_in.jj,tt_id]*T.exp(beta_prime[mcmc_in.ii])))
                                  )
                    return T.sum(out)
        r = pm.DensityDist('r', logp, observed=mcmc_in.r_obs)    

    with model:
        trace = pm.sample(draws=draws, tune=tune, chains=1)  
    
    return trace
Ejemplo n.º 28
0
from io import StringIO
import numpy as np
import pymc3 as pm
import matplotlib.pyplot as plt

data = np.genfromtxt("../../SourceData/Temperatures/LR04-stack.csv",
                     delimiter=",")
print(data[:, 1])
returns = pd.Series(data[:, 1],
                    index=data[:, 0])  #columns=['Age', 'Delta18O', 'Sigma'])
print(returns)

with pm.Model() as sp500_model:
    nu = pm.Exponential('nu', 1 / 10., testval=5.)
    sigma = pm.Exponential('sigma', 1 / 0.02, testval=.1)
    s = pm.GaussianRandomWalk('s', sd=sigma, shape=len(returns))
    volatility_process = pm.Deterministic('volatility_process',
                                          pm.math.exp(-2 * s)**0.5)
    r = pm.StudentT('r', nu=nu, sd=volatility_process, observed=returns)

with sp500_model:
    trace = pm.sample(200)

pm.traceplot(trace, varnames=['nu', 'sigma'])

fig, ax = plt.subplots(figsize=(15, 8))
returns.plot(ax=ax)
ax.plot(returns.index, 1 / np.exp(trace['s', ::5].T), 'C3', alpha=.03)
ax.set(title='volatility_process', xlabel='time', ylabel='volatility')
ax.legend(['S&P500', 'stochastic volatility process'])
Ejemplo n.º 29
0
    def _build_model(self, X, Y):
        #cutoff_idx = 1000
        #y_obs = np.ma.MaskedArray(Y, np.arange(N) > cutoff_idx)

        # Round up the number of data points to be used based on the interval parameter.
        actual_N = (X.shape[0] // self.interval) * self.interval
        self.ann_input = theano.shared(X[:actual_N])
        self.ann_output = theano.shared(Y[:actual_N])

        layer_inits = []
        for layer in range(len(self.nodes_per_layer)):
            if layer == 0:
                n_in = X.shape[1]
            else:
                n_in = self.nodes_per_layer[layer - 1]

            layer_inits.append(
                np.random.randn(n_in, self.nodes_per_layer[layer]).astype(
                    theano.config.floatX))

        init_out = np.random.randn(self.nodes_per_layer[-1]).astype(
            theano.config.floatX)

        with pm.Model() as self.model:
            self.weights = []

            step_size = pm.HalfNormal('step_size',
                                      sd=np.ones(self.nodes_per_layer[0]) *
                                      self.weight_sd,
                                      shape=self.nodes_per_layer[0])

            for layer in range(len(self.nodes_per_layer)):
                # TODO: need to add biases?
                if layer == 0:  # only the first layer will be GaussianRandomWalks
                    weights_intervals = pm.GaussianRandomWalk(
                        'w%s' % layer,
                        sd=step_size,
                        shape=(self.interval, X.shape[1],
                               self.nodes_per_layer[layer]),
                        testval=np.tile(layer_inits[layer],
                                        (self.interval, 1, 1)))

                    weights = tt.repeat(
                        weights_intervals,
                        (self.ann_input.shape[0] // self.interval),
                        axis=0)
                else:
                    weights_intervals = pm.Normal(
                        'w%s' % layer,
                        mu=0,
                        sd=self.weight_sd,
                        shape=(1, self.nodes_per_layer[layer - 1],
                               self.nodes_per_layer[layer]),
                        testval=layer_inits[layer])

                    weights = tt.repeat(weights_intervals, actual_N, axis=0)

                self.weights.append(weights)

            # TODO: support multidimensional Y output
            weights_out = pm.Normal('w_out',
                                    mu=0,
                                    sd=self.weight_sd,
                                    shape=(1, self.nodes_per_layer[-1]),
                                    testval=init_out)

            weights_out_rep = tt.repeat(weights_out, actual_N, axis=0)

            # Now assemble the neural network
            self.layers = []
            for layer in range(len(self.nodes_per_layer)):
                input = self.ann_input
                if layer > 0:
                    input = self.layers[layer - 1]

                batched_dot_product = tt.batched_dot(input,
                                                     self.weights[layer])
                self.layers.append(self.activation_fn(
                    batched_dot_product))  # + self.biases[layer]))

            if self.output == 'normal':
                layer_out = tt.batched_dot(self.layers[-1], weights_out_rep)
                bias_out = pm.Normal('bias_out', mu=0.0, sd=self.bias_sd)

                # Regression -> Gaussian likelihood
                pm.Normal('y',
                          mu=layer_out + bias_out,
                          sd=0.1,
                          observed=self.ann_output)
            elif self.output == 'bernoulli':
                layer_out = tt.nnet.sigmoid(
                    tt.batched_dot(self.layers[-1], weights_out_rep))

                # Binary classification -> Bernoulli likelihood
                pm.Bernoulli('y', layer_out, observed=self.ann_output)
            else:
                raise Exception(
                    "Unknown output parameter value: %s. Choose among 'normal', 'bernoulli'."
                    % self.output)