コード例 #1
0
    def random_regression(self):
        model_randomwalk = pm.Model()
        item = self.item[:3180]
        mpl_dates = date2num(item.index)
        with model_randomwalk:
            sigma_alpha = pm.Exponential('sigam_alpha', 1.0 / 0.02, testval=0.1)
            sigma_beta = pm.Exponential('sigma_beta', 1.0 / 0.02, testval=0.1)

            alpha = GaussianRandomWalk('alpha', sigma_alpha ** -2, shape=int(len(item) / self.subsample_alpha))
            beta = GaussianRandomWalk('beta', sigma_beta ** -2, shape=int(len(item) / self.subsample_beta))
            alpha_r = np.repeat(alpha, self.subsample_alpha)
            beta_r = np.repeat(beta, self.subsample_beta)

            regression = alpha_r + beta_r * item.SLV.values
            sd = pm.Uniform(name='sd', lower=0, upper=20)
            likelihood = pm.Normal(name='GLD', mu=regression, sd=sd,
                           observed=item.GLD.values)

            start = pm.find_MAP(vars=[alpha, beta], fmin=sco.fmin_l_bfgs_b)
            step = pm.NUTS(scaling=start)
            trace_rw = pm.sample(500, step, start=start, progressbar=False, tune=2000)

        part_dates = np.linspace(min(mpl_dates), max(mpl_dates), 53)

        fig, ax1 = plt.subplots(figsize=(10, 5))
        plt.plot(part_dates, np.mean(trace_rw['alpha'], axis=0), 'b', lw=2.5, label='alpha')
        for i in range(10, 55):
            plt.plot(part_dates, trace_rw['alpha'][i], 'b-.', lw=0.75)
        plt.xlabel('date')
        plt.ylabel('alpha')
        plt.axis('tight')
        plt.grid(True)
        plt.legend(loc=2)
        ax1.xaxis.set_major_formatter(DateFormatter('%d %b %y'))
        ax2 = ax1.twinx()
        plt.plot(part_dates, np.mean(trace_rw['beta'], axis=0), 'r', lw=2.5, label='beta')
        for i in range(10, 55):
            plt.plot(part_dates, trace_rw['beta'][i], 'r-.', lw=0.75)
        plt.ylabel('beta')
        plt.legend(loc=4)
        fig.autofmt_xdate()

        plt.figure(figsize=(10, 5))
        plt.scatter(item['SLV'], item['GLD'], c=mpl_dates[:3180], marker='o')
        plt.colorbar(ticks=DayLocator(interval=250), format=DateFormatter('%d %b %y'))
        plt.grid(True)
        plt.xlabel('SLV')
        plt.ylabel('GLD')
        x = np.linspace(min(item['SLV']), max(item['SLV']))
        for i in range(53):
            alpha_rw = np.mean(trace_rw['alpha'].T[i])
            beta_rw = np.mean(trace_rw['beta'].T[i])
            plt.plot(x, alpha_rw + beta_rw * x, color=plt.cm.jet(256 * i / 53))
コード例 #2
0
def configure_sample_stoch_vol_model(log_returns, samples):
  '''
Configure the stochastic volatility model using PyMC3
in a ’with’ context. Then sample from the model using
the No-U-Turn-Sampler (NUTS).
Plot the logarithmic volatility process and then the absolute returns overlaid with the estimated vol. '''
  print("Configuring stochastic volatility with PyMC3...")
  model = pm.Model() 
  with model:
    sigma = pm.Exponential('sigma', 50.0, testval=0.1)
    nu = pm.Exponential('nu', 0.1)
    s = GaussianRandomWalk('s', sigma**-2, shape=len(log_returns))
    logrets = pm.StudentT( 'logrets', nu, lam=pm.math.exp(-2.0*s), observed=log_returns)
  
  print("Fitting the stochastic volatility model...")
  with model:
    trace = pm.sample(samples)

  #pm.traceplot(trace, model.vars[:-1])
  #plt.show()

  print("Plotting the log vol")
  k = 10
  opacity = 0.03
  plt.plot(trace[s][::k].T, 'b', alpha=opacity)
  plt.xlabel("Time")
  plt.ylabel("Log Vol")
  plt.show()

  print("Plotting the absolute returns overlaid with vol...") 
  plt.plot(np.abs(np.exp(log_returns))-1.0, linewidth=0.5) 
  plt.plot(np.exp(trace[s][::k].T), 'r', alpha=opacity)
  plt.xlabel("Trading Days") 
  plt.ylabel("Absolute Returns/Volatility") 
  plt.show()
コード例 #3
0
def stochastic_vol_model(returns):
    with pm.Model() as model:
        step_size = pm.Exponential('sigma', 50.)
        s = GaussianRandomWalk('s', sd=step_size, shape=len(returns))
        nu = pm.Exponential('nu', .1)
        r = pm.StudentT('r', nu=nu, lam=pm.math.exp(-2*s), observed=returns)
    with model:
        trace = pm.sample(tune=2000, nuts_kwargs=dict(target_accept=.9))
        return exp(trace[s].T)
コード例 #4
0
ファイル: bayesian.py プロジェクト: zilongli/pyfolio
def model_stoch_vol(data, samples=2000):
    """
    Run stochastic volatility model.

    This model estimates the volatility of a returns series over time.
    Returns are assumed to be T-distributed. lambda (width of
    T-distributed) is assumed to follow a random-walk.

    Parameters
    ----------
    data : pandas.Series
        Return series to model.
    samples : int, optional
        Posterior samples to draw.

    Returns
    -------
    model : pymc.Model object
        PyMC3 model containing all random variables.
    trace : pymc3.sampling.BaseTrace object
        A PyMC3 trace object that contains samples for each parameter
        of the posterior.

    See Also
    --------
    plot_stoch_vol : plotting of tochastic volatility model
    """

    from pymc3.distributions.timeseries import GaussianRandomWalk

    with pm.Model() as model:
        nu = pm.Exponential('nu', 1. / 10, testval=5.)
        sigma = pm.Exponential('sigma', 1. / .02, testval=.1)
        s = GaussianRandomWalk('s', sigma**-2, shape=len(data))
        volatility_process = pm.Deterministic('volatility_process',
                                              pm.math.exp(-2 * s))
        StudentT('r', nu, lam=volatility_process, observed=data)
        start = pm.find_MAP(vars=[s], fmin=sp.optimize.fmin_l_bfgs_b)

        step = pm.NUTS(scaling=start)
        trace = pm.sample(100, step, progressbar=False)

        # Start next run at the last sampled position.
        step = pm.NUTS(scaling=trace[-1], gamma=.25)
        trace = pm.sample(samples, step, start=trace[-1], progressbar=False)

    return model, trace
コード例 #5
0
def model_stoch_vol(data, samples=2000):
    """
    Run stochastic volatility model.

    This model estimates the volatility of a returns series over time.
    Returns are assumed to be T-distributed. lambda (width of
    T-distributed) is assumed to follow a random-walk.

    Parameters
    ----------
    data : pandas.Series
        Return series to model.
    samples : int, optional
        Posterior samples to draw.

    Returns
    -------
    model : pymc.Model object
        PyMC3 model containing all random variables.
    trace : pymc3.sampling.BaseTrace object
        A PyMC3 trace object that contains samples for each parameter
        of the posterior.

    See Also
    --------
    plot_stoch_vol : plotting of tochastic volatility model
    """

    from pymc3.distributions.timeseries import GaussianRandomWalk

    with pm.Model() as model:
        nu = pm.Exponential('nu', 1. / 10, testval=5.)
        sigma = pm.Exponential('sigma', 1. / .02, testval=.1)
        s = GaussianRandomWalk('s', sigma**-2, shape=len(data))
        volatility_process = pm.Deterministic('volatility_process',
                                              pm.math.exp(-2 * s))
        StudentT('r', nu, lam=volatility_process, observed=data)

        trace = pm.sample(samples)

    return model, trace
コード例 #6
0
plt.ylabel('daily returns in %')

# define the model
# \sig ~ exp(50)
#       why? stdev of returns is approx 0.02
#       stdev of exp(lam=50) = 0.2
# \nu ~ exp(0.1)
#       the DOF for the student T...which should be sample size
#       mean of exp(lam=0.1) = 10
# s_i ~ normal(s_i-1, \sig^-2)
# log(y_i) ~ studentT(\nu, 0, exp(-2s_i))
with Model() as sp500_model:
    nu = Exponential('nu', 1. / 10,
                     testval=5.)  #50, testval=5.)#results similar...
    sigma = Exponential('sigma', 1. / .02, testval=.1)
    s = GaussianRandomWalk('s', sigma**-2, shape=len(returns))
    volatility_process = Deterministic('volatility_process', exp(-2 * s))
    r = StudentT('r', nu, lam=1 / volatility_process, observed=returns)

# fit the model using NUTS
# NUTS is auto-assigned in sample()...why?
# you may get an error like:
#   WARNING (theano.gof.compilelock): Overriding existing lock by dead process '10876' (I am process '3456')
# ignore it...the process will move along
with sp500_model:
    trace = sample(2000, progressbar=False)
# plot results from model fitting...
# is there a practical reason for starting the plot from 200th sample
traceplot(trace[200:], [nu, sigma])

# plot the results: volatility inferred by the model
コード例 #7
0
ファイル: GHME_2013.py プロジェクト: Fadh1/Virtual-Tutor
    x = np.array(x)
    group = np.array(group)

    idx = np.searchsorted(x0, x)
    dl = np.array(x - x0[idx - 1])
    dr = np.array(x0[idx] - x)
    d = dl + dr
    wl = dr / d

    return wl * y0[idx - 1, group] + (1 - wl) * y0[idx, group]


with Model() as model:
    coeff_sd = HalfCauchy('coeff_sd', 5)

    y = GaussianRandomWalk('y', sigma=coeff_sd, shape=(nknots, ncountries))

    p = interpolate(knots, y, age, group)

    sd = HalfCauchy('sd', 5)

    vals = Normal('vals', p, sigma=sd, observed=rate)


def run(n=3000):
    if n == "short":
        n = 150
    with model:
        trace = sample(n, tune=int(n / 2), init='advi+adapt_diag')

    for i, country in enumerate(countries):
コード例 #8
0
plt.plot(x, y)
plt.xlabel("x")
plt.ylabel("y")
plt.title("Observed Data")
plt.savefig('Observed_Data.png')


LARGE_NUMBER = 1e5

model = pm.Model()
with model:
    smoothing_param = shared(0.9)
    mu = pm.Normal("mu", sd=LARGE_NUMBER)
    tau = pm.Exponential("tau", 1.0/LARGE_NUMBER)
    z = GaussianRandomWalk("z",
                           mu=mu,
                           tau=tau / (1.0 - smoothing_param),
                           shape=y.shape)
    obs = pm.Normal("obs",
                    mu=z,
                    tau=tau / smoothing_param,
                    observed=y)

def infer_z(smoothing):
    with model:
        smoothing_param.set_value(smoothing)
        res = pm.find_MAP(vars=[z], fmin=optimize.fmin_l_bfgs_b)
        return res['z']


# allocate 50% variance to the noise #                             
smoothing = 0.98
コード例 #9
0
from pymc3.distributions.timeseries import GaussianRandomWalk
from scipy import optimize

import pandas as pd
n = 400
returns = pd.read_csv(pm.get_data("SP500.csv"), index_col='date')['change']
returns[:5]

fig, ax = plt.subplots(figsize=(14, 8))
returns.plot(label='S&P500')
ax.set(xlabel='time', ylabel='returns')
ax.legend()

with pm.Model() as model:
    step_size = pm.Exponential('sigma', 50.)
    s = GaussianRandomWalk('s', sigma=step_size, shape=len(returns))

    nu = pm.Exponential('nu', .1)

    r = pm.StudentT('r', nu=nu, lam=pm.math.exp(-2 * s), observed=returns)

with model:
    trace = pm.sample(tune=2000, target_accept=0.9)

pm.traceplot(trace, var_names=['sigma', 'nu'])

fig, ax = plt.subplots()

plt.plot(trace['s'].T, 'b', alpha=.03)
ax.set(title=str(s), xlabel='time', ylabel='log volatility')
コード例 #10
0
def main(group, anType, sessionNum):
    if group == 'Young':
        fig_no = 1
    else:
        fig_no = 2

    dir = '/Users/adelekap/Documents/WMaze_Analysis/Paper/data/'
    data_denom = pd.read_csv('{0}{1}Session/{2}{3}Denom.csv'.format(
        dir, str(sessionNum), anType,
        group))  # csv of total trials for each day
    data_numAll = pd.read_csv('{0}{1}Session/{2}{3}Num.csv'.format(
        dir, str(sessionNum), anType, group))  # correct per day

    numAnimals = len(data_numAll)

    with pm.Model() as model_old:
        sigma = pm.Uniform('sigma', float(sigmaMin), float(sigmaMax))
        sigmab = pm.Uniform('sigmab', float(sigmabMin), float(sigmabMax))

        betaPop0 = pm.Normal('betaPop0', mu=0, sd=100)
        beta_0 = pm.Normal('beta_0',
                           mu=betaPop0,
                           sd=sigmab,
                           shape=len(data_numAll))

        x = GaussianRandomWalk('x',
                               sd=sigma,
                               init=pm.Normal.dist(mu=0.0, sd=0.01),
                               shape=data_numAll.shape[1])
        pm.Deterministic('p', tinvlogit(x + betaPop0))

        for rat in range(numAnimals):
            stp = 'p{0}'.format(rat)
            stn = 'n{0}'.format(rat)
            pn = pm.Deterministic(stp, tinvlogit(x + beta_0[rat]))
            pm.Binomial(stn,
                        p=pn,
                        n=np.asarray(data_denom[rat:(rat + 1)]),
                        observed=np.asarray(data_numAll[rat:(rat + 1)]))

    with model_old:
        step1 = pm.NUTS(vars=[x, sigmab, beta_0], gamma=.25)
        start2 = pm.sample(2000, step1)[-1]

        # Start next run at the last sampled position.
        step2 = pm.NUTS(vars=[x, sigmab, beta_0], scaling=start2, gamma=.55)
        trace1 = pm.sample(5000, step2, start=start2, progressbar=True)

    print('---------')
    (waic_val, waic_se, waic_p) = pm.stats.waic(model=model_old,
                                                trace=trace1,
                                                n_eff=True)
    dic_val = pm.stats.dic(model=model_old, trace=trace1)
    print('WAIC ', waic_val, '  DIC ', dic_val)
    print('---------')
    # plt.figure(50)
    # pm.traceplot(trace1, varnames=['sigmab', 'beta_0', 'sigma'])
    # plt.savefig('trace' + group + '.pdf')

    lt1 = {}
    for ii in range(len(data_numAll)):
        lc = 'p' + str(ii)
        summary_dataset = np.percentile(trace1[lc][:], [5, 50, 95], axis=0)
        # lt1[ii] = plot_results(np.asarray(summary_dataset), fig_no, ii + 1, group)

    if anType == 'overall':
        dtype = 'Overall'
    if anType == 'inbound':
        dtype = 'Inbound'
    if anType == 'outbound':
        dtype = 'Outbound'

    dataDir = '/Users/adelekap/Documents/WMaze_Analysis/StochasticVolatility/BySession/'
    txtFile = '{0}{1}Learning/{1}_{2}_learningTrials.txt'.format(
        dataDir, dtype, group)
    with open(txtFile, 'w') as learn:
        lts = lt1.values()
        for trial in lts:
            learn.write(str(trial) + '\n')
        learn.write("AVERAGE LEARNING TRIAL: " + str(np.average(lts)) + '\n')
        learn.write("STANDARD ERROR: " + str(stats.sem(lts)))

    print "|||||||||||Completed Analysis for " + group + " data|||||||||||||"
    summary_dataset = np.percentile(trace1['p'], [5, 50, 95], axis=0)
    with open('{0}{1}DATASET.txt'.format(group, anType), 'w') as data:
        data.write(str(np.asarray(summary_dataset)))
    # plot_results(np.asarray(summary_dataset), 3, 2, group)
    return trace1['p']
コード例 #11
0
def main():

    #Load mastectomy dataset
    df = datasets.get_rdataset('mastectomy', 'HSAUR', cache=True).data
    #Change event to integer
    df.event = df.event.astype(np.int64)
    #Change metastized to integer (1 for yes, 0 for no)
    df.metastized = (df.metastized == 'yes').astype(np.int64)
    #Count the number of patients
    n_patients = df.shape[0]
    #Create array for each individual patient
    patients = np.arange(n_patients)

    #Censoring - we do not observe the death of every subject, and subjects may still be alive at time t=0
    #1 - observation is not censored (death was observed)
    #0 - observation is censored (death was not observed)
    nonCensored = df.event.mean()

    #Create censoring plot
    fig, ax = plt.subplots(figsize=(8, 6))
    blue, _, red = sns.color_palette()[:3]
    #Create horizontal lines for censored observations
    ax.hlines(patients[df.event.values == 0],
              0,
              df[df.event.values == 0].time,
              color=blue,
              label='Censored')
    #Create horizontal red lines for uncensored observations
    ax.hlines(patients[df.event.values == 1],
              0,
              df[df.event.values == 1].time,
              color=red,
              label='Uncensored')
    #Create scatter ppoints for metastized months
    ax.scatter(df[df.metastized.values == 1].time,
               patients[df.metastized.values == 1],
               color='k',
               zorder=10,
               label='Metastized')
    ax.set_xlim(left=0)
    ax.set_xlabel('Months since mastectomy')
    ax.set_yticks([])
    ax.set_ylabel('Subject')
    ax.set_ylim(-0.25, n_patients + 0.25)
    ax.legend(loc='center right')

    #To understand the impact of metastization on survival time, we use a risk regression model
    #Cox proportional hazards model
    #Make intervals 3 months long
    interval_length = 3
    interval_bounds = np.arange(0,
                                df.time.max() + interval_length + 1,
                                interval_length)
    n_intervals = interval_bounds.size - 1
    intervals = np.arange(n_intervals)
    #Check how deaths and censored observations are distributed in intervals
    fig, ax = plt.subplots(figsize=(8, 6))
    #Plot histogram of uncensored events
    ax.hist(df[df.event == 1].time.values,
            bins=interval_bounds,
            color=red,
            alpha=0.5,
            lw=0,
            label='Uncensored')
    #Plot histogram of censored events
    ax.hist(df[df.event == 0].time.values,
            bins=interval_bounds,
            color=blue,
            alpha=0.5,
            lw=0,
            label='Censored')
    ax.set_xlim(0, interval_bounds[-1])
    ax.set_xlabel('Months since mastectomy')
    ax.set_yticks([0, 1, 2, 3])
    ax.set_ylabel('Number of observations')
    ax.legend()

    #Calculates the last interval period when a subject was alive
    last_period = np.floor((df.time - 0.01) / interval_length).astype(int)
    #Creates an empty matrix to store deaths
    death = np.zeros((n_patients, n_intervals))
    #For each patient (row), create an event where the last interval period was observed (column)
    death[patients, last_period] = df.event

    #Create matrix of the amount of time a subject (row) was at risk in an interval (column)
    exposure = np.greater_equal.outer(df.time,
                                      interval_bounds[:-1]) * interval_length
    exposure[patients, last_period] = df.time - interval_bounds[last_period]

    #Define parameters for PyMC
    SEED = 5078864
    n_samples = 1000
    n_tune = 1000

    #Create PyMC model -> lambda(t) = lambda0(t) * e ^ (X*beta)
    with pm.Model() as model:
        #Define prior distribution of hazards as vague Gamma distribution
        lambda0 = pm.Gamma('lambda0', 0.01, 0.01, shape=n_intervals)

        #Define hazard regression coefficients (beta) for covariates X as a normal distribution
        beta = pm.Normal('beta', 0, sd=1000)

        #Create equation for lambda(t) as a deterministic node - record sampled values as part of output
        #T.outer = symbolic matrix, vector-vector outer product
        lambda_ = pm.Deterministic(
            'lambda_', T.outer(T.exp(beta * df.metastized), lambda0))
        #Mu is created from our lambda values (hazard) times patient exposure per interval
        mu = pm.Deterministic('mu', exposure * lambda_)

        #We model the posterior distribution as a Poisson distribution with mean Mu
        obs = pm.Poisson('obs', mu, observed=death)

    with model:
        trace = pm.sample(n_samples, tune=n_tune, random_seed=SEED)

    pm.traceplot(trace)

    #Calculate hazard rate for subjects with metastized cancer (based on regression coefficients)
    hazardRate = np.exp(trace['beta'].mean())
    pm.plot_posterior(trace, varnames=['beta'], color='#87ceeb')
    pm.autocorrplot(trace, varnames=['beta'])

    #Store base hazard as well as metastized hazard for each sample per interval
    #(sample x number of intervals)
    base_hazard = trace['lambda0']
    met_hazard = trace['lambda0'] * np.exp(np.atleast_2d(trace['beta']).T)

    #Calculate cumulative hazard
    def cum_hazard(hazard):
        return (interval_length * hazard).cumsum(axis=-1)

    #Calculative survival as = e^(-cumulative hazard)
    def survival(hazard):
        return np.exp(-cum_hazard(hazard))

    #Plot highest posterior density
    def plot_with_hpd(x, hazard, f, ax, color=None, label=None, alpha=0.05):
        #Use function f on hazard mean
        mean = f(hazard.mean(axis=0))
        #Create confidence percentiles
        percentiles = 100 * np.array([alpha / 2., 1. - alpha / 2.])
        hpd = np.percentile(f(hazard), percentiles, axis=0)

        ax.fill_between(x, hpd[0], hpd[1], color=color, alpha=0.25)
        ax.step(x, mean, color=color, label=label)

    #Create figure
    fig, (hazard_ax, surv_ax) = plt.subplots(ncols=2,
                                             sharex=True,
                                             sharey=False,
                                             figsize=(16, 6))
    #Plot Hazard with HPD up until the last interval for non-metasized cancer
    plot_with_hpd(interval_bounds[:-1],
                  base_hazard,
                  cum_hazard,
                  hazard_ax,
                  color=blue,
                  label='Had not metastized')
    #Plot Hazard with HPD up until the last interval for metasized cancer
    plot_with_hpd(interval_bounds[:-1],
                  met_hazard,
                  cum_hazard,
                  hazard_ax,
                  color=red,
                  label='Metastized')
    hazard_ax.set_xlim(0, df.time.max())
    hazard_ax.set_xlabel('Months since mastectomy')
    hazard_ax.set_ylabel(r'Cumulative hazard $\Lambda(t)$')
    hazard_ax.legend(loc=2)
    #Plot Survival with HPD up until the last interval for non-metasized cancer
    plot_with_hpd(interval_bounds[:-1],
                  base_hazard,
                  survival,
                  surv_ax,
                  color=blue)
    #Plot Survival with HPD up until the last interval for metasized cancer
    plot_with_hpd(interval_bounds[:-1],
                  met_hazard,
                  survival,
                  surv_ax,
                  color=red)
    surv_ax.set_xlim(0, df.time.max())
    surv_ax.set_xlabel('Months since mastectomy')
    surv_ax.set_ylabel('Survival function $S(t)$')
    fig.suptitle('Bayesian survival model')

    #Consider time varying effects
    with pm.Model() as time_varying_model:
        lambda0 = pm.Gamma('lambda0', 0.01, 0.01, shape=n_intervals)
        #Beta is now modeled as a normal random walk instead of a normal distribution
        #This is due to the fact that the regression coefficients can vary over time
        beta = GaussianRandomWalk('beta', tau=1., shape=n_intervals)

        lambda_ = pm.Deterministic(
            'h', lambda0 * T.exp(T.outer(T.constant(df.metastized), beta)))
        mu = pm.Deterministic('mu', exposure * lambda_)

        obs = pm.Poisson('obs', mu, observed=death)

    with time_varying_model:
        time_varying_trace = pm.sample(n_samples,
                                       tune=n_tune,
                                       random_seed=SEED)

    pm.traceplot(time_varying_trace)
    pm.plot_posterior(time_varying_trace, varnames=['beta'], color='#87ceeb')
    pm.forestplot(time_varying_trace, varnames=['beta'])

    #Create plot to show the mean trace of beta
    fig, ax = plt.subplots(figsize=(8, 6))
    #Create percentiles of the new trace
    beta_hpd = np.percentile(time_varying_trace['beta'], [2.5, 97.5], axis=0)
    beta_low = beta_hpd[0]
    beta_high = beta_hpd[1]
    #Fill percentile interval
    ax.fill_between(interval_bounds[:-1],
                    beta_low,
                    beta_high,
                    color=blue,
                    alpha=0.25)
    #Create the mean estimate for beta from trace samples
    beta_hat = time_varying_trace['beta'].mean(axis=0)
    #Plot a stepwise line for beta_hat per interval
    ax.step(interval_bounds[:-1], beta_hat, color=blue)
    #Plot points where cancer was metastized, differentiation between death and censorship
    ax.scatter(interval_bounds[last_period[(df.event.values == 1)
                                           & (df.metastized == 1)]],
               beta_hat[last_period[(df.event.values == 1)
                                    & (df.metastized == 1)]],
               c=red,
               zorder=10,
               label='Died, cancer metastized')
    ax.scatter(interval_bounds[last_period[(df.event.values == 0)
                                           & (df.metastized == 1)]],
               beta_hat[last_period[(df.event.values == 0)
                                    & (df.metastized == 1)]],
               c=blue,
               zorder=10,
               label='Censored, cancer metastized')
    ax.set_xlim(0, df.time.max())
    ax.set_xlabel('Months since mastectomy')
    ax.set_ylabel(r'$\beta_j$')
    ax.legend()

    #Store time-varying model
    tv_base_hazard = time_varying_trace['lambda0']
    tv_met_hazard = time_varying_trace['lambda0'] * np.exp(
        np.atleast_2d(time_varying_trace['beta']))

    #Plot cumulative hazard functions with and without time-varying effect
    fig, ax = plt.subplots(figsize=(8, 6))
    ax.step(interval_bounds[:-1],
            cum_hazard(base_hazard.mean(axis=0)),
            color=blue,
            label='Had not metastized')
    ax.step(interval_bounds[:-1],
            cum_hazard(met_hazard.mean(axis=0)),
            color=red,
            label='Metastized')
    ax.step(interval_bounds[:-1],
            cum_hazard(tv_base_hazard.mean(axis=0)),
            color=blue,
            linestyle='--',
            label='Had not metastized (time varying effect)')
    ax.step(interval_bounds[:-1],
            cum_hazard(tv_met_hazard.mean(axis=0)),
            color=red,
            linestyle='--',
            label='Metastized (time varying effect)')
    ax.set_xlim(0, df.time.max() - 4)
    ax.set_xlabel('Months since mastectomy')
    ax.set_ylim(0, 2)
    ax.set_ylabel(r'Cumulative hazard $\Lambda(t)$')
    ax.legend(loc=2)

    #Plot cumulative hazard and survival models with HPD
    fig, (hazard_ax, surv_ax) = plt.subplots(ncols=2,
                                             sharex=True,
                                             sharey=False,
                                             figsize=(16, 6))
    plot_with_hpd(interval_bounds[:-1],
                  tv_base_hazard,
                  cum_hazard,
                  hazard_ax,
                  color=blue,
                  label='Had not metastized')
    plot_with_hpd(interval_bounds[:-1],
                  tv_met_hazard,
                  cum_hazard,
                  hazard_ax,
                  color=red,
                  label='Metastized')
    hazard_ax.set_xlim(0, df.time.max())
    hazard_ax.set_xlabel('Months since mastectomy')
    hazard_ax.set_ylim(0, 2)
    hazard_ax.set_ylabel(r'Cumulative hazard $\Lambda(t)$')
    hazard_ax.legend(loc=2)
    plot_with_hpd(interval_bounds[:-1],
                  tv_base_hazard,
                  survival,
                  surv_ax,
                  color=blue)
    plot_with_hpd(interval_bounds[:-1],
                  tv_met_hazard,
                  survival,
                  surv_ax,
                  color=red)
    surv_ax.set_xlim(0, df.time.max())
    surv_ax.set_xlabel('Months since mastectomy')
    surv_ax.set_ylabel('Survival function $S(t)$')
    fig.suptitle('Bayesian survival model with time varying effects')

    plt.show()

    print('x')
コード例 #12
0
def bayes_randomwalk():
    # NOTE not compatible in python 3 version
    data = pd.DataFrame()
    symbols = ['GLD', 'GDX']
    for sym in symbols:
        data[sym] = web.DataReader(sym, data_source='google')['Close']

    pdb.set_trace()
    model_randomwalk = pm.Model()
    with model_randomwalk:
        # std of random walk best sampled in log space
        sigma_alpha, log_sigma_alpha = \
                model_randomwalk.TransformedVar('sigma_alpha',
                                pm.Exponential.dist(1. / .02, testval=.1),
                                pm.logtransform)
        sigma_beta, log_sigma_beta = \
                model_randomwalk.TransformedVar('sigma_beta',
                                pm.Exponential.dist(1. / .02, testval=.1),
                                pm.logtransform)
    # to make the model more simple, we will apply the same coefficients
    # to 50 data points at a time

    subsample_alpha = 50
    subsample_beta = 50
    with model_randomwalk:
        alpha = GaussianRandomWalk('alpha',
                                   sigma_alpha**-2,
                                   shape=len(data) / subsample_alpha)
        beta = GaussianRandomWalk('beta',
                                  sigma_beta**-2,
                                  shape=len(data) / subsample_beta)

        # make coefficients have the same length as prices
        alpha_r = np.repeat(alpha, subsample_alpha)
        beta_r = np.repeat(beta, subsample_beta)
        print(len(data.dropna().GDX.values))  # a bit longer than 1,950

    with model_randomwalk:
        # define regression
        regression = alpha_r + beta_r * data.GDX.values[:1950]

        # assume prices are normally distributed,
        # the mean comes from the regression
        sd = pm.Uniform('sd', 0, 20)
        likelihood = pm.Normal('GLD',
                               mu=regression,
                               sd=sd,
                               observed=data.GLD.values[:1950])

    with model_randomwalk:
        # first optimize random walk
        start = pm.find_MAP(vars=[alpha, beta], fmin=sco.fmin_l_bfgs_b)

        # sampling
        step = pm.NUTS(scaling=start)
        trace_rw = pm.sample(100, step, start=start, progressbar=False)
    print(np.shape(trace_rw['alpha']))

    part_dates = np.linspace(min(mpl_dates), max(mpl_dates), 39)
    fig, ax1 = plt.subplots(figsize=(10, 5))
    plt.plot(part_dates,
             np.mean(trace_rw['alpha'], axis=0),
             'b',
             lw=2.5,
             label='alpha')
    for i in range(45, 55):
        plt.plot(part_dates, trace_rw['alpha'][i], 'b-.', lw=0.75)
    plt.xlabel('date')
    plt.ylabel('alpha')
    plt.axis('tight')
    plt.grid(True)
    plt.legend(loc=2)
    ax1.xaxis.set_major_formatter(mpl.dates.DateFormatter('%d %b %y'))
    ax2 = ax1.twinx()
    plt.plot(part_dates,
             np.mean(trace_rw['beta'], axis=0),
             'r',
             lw=2.5,
             label='beta')
    for i in range(45, 55):
        plt.plot(part_dates, trace_rw['beta'][i], 'r-.', lw=0.75)
    plt.ylabel('beta')
    plt.legend(loc=4)
    fig.autofmt_xdate()
    plt.savefig(PATH + 'bayes8.png', dpi=300)
    plt.close()

    plt.figure(figsize=(10, 5))
    plt.scatter(data['GDX'], data['GLD'], c=mpl_dates, marker='o')
    plt.colorbar(ticks=mpl.dates.DayLocator(interval=250),
                 format=mpl.dates.DateFormatter('%d %b %y'))
    plt.grid(True)
    plt.xlabel('GDX')
    plt.ylabel('GLD')
    x = np.linspace(min(data['GDX']), max(data['GDX']))
    for i in range(39):
        alpha_rw = np.mean(trace_rw['alpha'].T[i])
        beta_rw = np.mean(trace_rw['beta'].T[i])
        plt.plot(x, alpha_rw + beta_rw * x, color=plt.cm.jet(256 * i / 39))
    plt.savefig(PATH + 'bayes9.png', dpi=300)
    plt.close()