Exemple #1
0
def lpd(count_data, R):
    """ Compute lpd values of the 2 models, keep its difference, its standard deviation and the location of the change point found. """

    N = len(count_data)  #number of points
    prs = max(count_data) + 10  # prior took high
    nc = 6  # number of processor put high

    #compute lpd value of the no change model
    lpd_poisson = np.zeros(N)  # list of lpd values of points
    model_poisson_loo = stan_utility.compile_model('STAN/poisson_loo.stan')
    for n in range(N):  #iterate
        d_loo = count_data[n]  #point removed
        d = np.array([count_data[i] for i in range(N)
                      if i != n])  #rest of data
        fit_data = {"N": N, "d": d, "prs": prs, "d_loo": d_loo, "n_loo": n}
        fit_nocp_loo = model_poisson_loo.sampling(data=fit_data,
                                                  iter=R,
                                                  chains=nc,
                                                  seed=4838282,
                                                  refresh=0)  #4000 fits
        lpd_poisson[n] = logsumexp(fit_nocp_loo["log_lik_d_loo"]) - np.log(
            len(fit_nocp_loo["log_lik_d_loo"]))
    loonochange = np.sum(lpd_poisson)  # total lpd value of the model

    # compute lpd value of the change point model
    lpd_poisson_cp = np.zeros(N)  # list of lpd values of points
    model_poisson_loo_cp = stan_utility.compile_model(
        'STAN/poisson_cp_loo.stan')
    for n in range(N):  #iterate
        d_loo = count_data[n]  #point removed
        d = np.array([count_data[i] for i in range(N)
                      if i != n])  #rest of data
        fit_data = {
            "N": N,
            "d": d,
            "pes": prs,
            "pls": prs,
            "d_loo": d_loo,
            "n_loo": n
        }
        fit_cp_loo = model_poisson_loo_cp.sampling(data=fit_data,
                                                   iter=R,
                                                   chains=nc,
                                                   seed=4838282,
                                                   refresh=0)
        lpd_poisson_cp[n] = logsumexp(fit_cp_loo["log_lik_d_loo"]) - np.log(
            len(fit_cp_loo["log_lik_d_loo"]))
    loochange = np.sum(lpd_poisson_cp)  # total lpd value of the model

    #compute standard deviation of the difference of lpd values
    diff = lpd_poisson_cp - lpd_poisson
    sigma = np.sqrt(N * st.pvariance(diff))

    #look for change point location
    cpdistrib = cpsearch(count_data, R)
    cp = np.argmax(cpdistrib)

    return (
        loochange - loonochange, sigma, cp
    )  #lpd value difference, its standard deviation, and change point location
 def __init__(self,
              dgp_model_name,
              fit_model_name,
              sampler_args,
              stats=[],
              seed=SEED):
     self.fit_model_name = fit_model_name
     self.fit_model = stan_utility.compile_model(fit_model_name)
     self.dgp_model_name = dgp_model_name
     self.dgp_model = stan_utility.compile_model(dgp_model_name)
     self.sampler_args = sampler_args
     self.stats = self.default_stats + stats
     self.seed = seed
Exemple #3
0
 def __init__(self,
              X: np.ndarray,
              y: List[Tuple[int, float]],
              yc: List[List[Tuple[int, int]]],
              kernel: GPy.kern.Kern,
              likelihood: Gaussian,
              posterior_samples: int = 45,
              get_logger: Callable = None):
     super(MCMCComparisonGP, self).__init__(name="MCMC")
     self.N, self.D = X.shape[0], X.shape[1]
     self.output_dim = 1  ## hard coded
     self.posterior_samples = posterior_samples
     self.X = X
     self.y = y
     self.yc = yc
     self.noise_std = np.sqrt(likelihood.variance[0])
     self.sigma2s = (likelihood.variance[0]) * np.ones(
         (X.shape[0], 1), dtype=int)
     self.variance = kernel.variance[0]
     self.lengthscale = np.array([kernel.lengthscale[:]]).flatten()
     self.kern = kernel
     self.posterior = None
     if not os.path.exists(stan_utility.file_utils.get_path_of_cache()):
         os.makedirs(stan_utility.file_utils.get_path_of_cache())
     self.model = stan_utility.compile_model(os.path.join(
         os.path.dirname(__file__), "inferences/sexpgp_comparison.stan"),
                                             model_name='comparison_model')
     self.get_logger = get_logger
     self.parameters_changed()
def test_compile_file():
    import stan_utility.cache
    with tempfile.TemporaryDirectory() as cachedir:
        print("using cachedir:", cachedir)
        stan_utility.cache.path = cachedir
        stan_utility.cache.mem = joblib.Memory(cachedir, verbose=False)
        
        import stan_utility

        model = stan_utility.compile_model(os.path.join(os.path.dirname(__file__), 'test.stan'))
        data = dict(
            mean=1,
            unused=np.random.normal(size=(4,42)),
        )
        stan_utility.sample_model(model, data, chains=2)

        files = os.listdir(stan_utility.cache.get_path())
        assert "joblib" in files
        assert any(f for f in files if f.startswith("cached-") and f.endswith('.pkl')), files
        assert len(files) > 1, files

        stan_utility.cache.clear()

        files = os.listdir(stan_utility.cache.get_path())
        assert files == ["joblib"], files
Exemple #5
0
def cpsearch(count_data, R):
    """ Return the probability density function of the change point over the list of rates count_data"""

    N = len(count_data)  #number of points
    prs = max(count_data) + 10  # prior took high
    nc = 6  # number of processor put high
    fit_data = {
        "N": N,
        "d": count_data,
        "pes": prs,
        "pls": prs
    }  # fit the data
    model_cp = stan_utility.compile_model('STAN/poisson_cp_ppc.stan')
    fit_cp = model_cp.sampling(
        data=fit_data, iter=R, chains=nc, seed=4838282,
        refresh=0)  # fit the change point model to the data
    #convert fitting result to PDF curve
    bins = np.linspace(1, N, N + 1)
    counts = [
        np.histogram(x, bins=bins)[0] for x in fit_cp["cp"][np.newaxis, :]
    ]
    probs = [10, 20, 30, 40, 50, 60, 70, 80, 90]
    counts = [c / (R * 3) for c in counts]
    creds = [
        np.percentile([count[b] for count in counts], probs) for b in range(N)
    ]

    idxs = [idx for idx in range(N)]
    xs = [
        bins[idx] + delta for idx in range(N)
        for delta in [0, bins[1] - bins[0]]
    ]
    pad_creds = [creds[idx] for idx in idxs]
    proba = [c[4] for c in pad_creds]
    return proba
Exemple #6
0
def plot_nocp(count_data, R):
    """ Fit the no change point model on the data and plot the result. """

    N = len(count_data)  #number of points
    prs = max(count_data) + 10  # prior took high
    nc = 6  # number of processor put high

    #fitting
    fit_data = {"N": N, "d": count_data, "prs": prs}
    model_nocp = stan_utility.compile_model('STAN/poisson_ppc.stan')

    # posterior predictive distribution
    Xs = fit_nocp['d_ppc']
    cumsums = np.array([np.cumsum(x) for x in Xs])
    probs = [10, 20, 30, 40, 50, 60, 70, 80, 90]
    creds = [np.percentile(xt, probs) for xt in cumsums.T]
    idxs = [idx for idx in range(Xs.shape[1]) for r in range(2)]
    xs = [
        idx + delta for idx in range(1, Xs.shape[1] + 1)
        for delta in [-0.5, 0.5]
    ]
    pad_creds = [creds[idx] for idx in idxs]

    #plotting
    plt.figure()
    plt.fill_between(xs, [c[0] for c in pad_creds], [c[8] for c in pad_creds],
                     facecolor=light,
                     color=light,
                     label='80% predictive distribution')
    plt.fill_between(xs, [c[0] for c in pad_creds], [c[7] for c in pad_creds],
                     facecolor=light_highlight,
                     color=light_highlight,
                     label='60%')
    plt.fill_between(xs, [c[2] for c in pad_creds], [c[6] for c in pad_creds],
                     facecolor=mid,
                     color=mid,
                     label='40%')
    plt.fill_between(xs, [c[3] for c in pad_creds], [c[5] for c in pad_creds],
                     facecolor=mid_highlight,
                     color=mid_highlight,
                     label='20%')
    plt.plot(xs, [c[4] for c in pad_creds],
             color=dark,
             label='model',
             linewidth=1)
    plt.legend(bbox_to_anchor=(0.025, 1.06),
               loc=2,
               borderaxespad=0.,
               fontsize=12)
    plt.gca().set_xlim([0, Xs.shape[1]])
    plt.gca().set_xlabel("Data point", size=15)
    plt.xticks(fontsize=15)
    plt.gca().set_ylim([0, max([c[8] for c in creds])])
    plt.gca().set_ylabel("Cumulative number of events",
                         size=15,
                         color='crimson')
    plt.yticks(fontsize=12)
    plt.show()
    return
Exemple #7
0
    def __init__(self, stan_sim_file, include_paths):
        """
        Handles posterior predictive checks.
        :param stan_sim_file: the stan file to use to run the simulation
        """

        # compile the stan model
        self.simulation = stan_utility.compile_model(
            filename=stan_sim_file,
            model_name='ppc_sim',
            include_paths=include_paths)

        self.arrival_direction_preds = []
        self.Edet_preds = []
        self.Nex_preds = []
        self.labels_preds = []
def run_stan_sim(N, Eth_sim, alpha, D, Eth, f_E, sim_filename):
    """
    Run the Stan simulation for N events above Eth_sim from distance D
    and return the fraction above Eth.
    
    :param N: Number of UHECRs to simulate.
    :param Eth_sim: The minimum energy of UHECR generated in the simulation.
    :param alpha: The spectral index of the sourc UHECR (power law spectrum).
    :param D: The distance at which a shell of sources is to be placed.
    :param Eth: The threshold energy of a UHECR sample.
    :param f_E: Fractional energy uncertainty
    :param sim_filename: The filename of the Stan simulation code.
    
    :return: The detection probability for Edet > Eth and Earr > Eth.
    """

    # Run the simulation.
    sim_input = {
        'N': N,
        'alpha': alpha,
        'Eth_sim': Eth_sim,
        'D': D,
        'f_E': f_E
    }
    sim = stan_utility.compile_model(filename=sim_filename,
                                     model_name='uhecr_E_loss',
                                     include_paths=stan_path)
    sim_output = sim.sampling(data=sim_input,
                              iter=1,
                              chains=1,
                              algorithm="Fixed_param")

    # Extract the output.
    E = sim_output.extract(['E'])['E'][0]
    Earr = sim_output.extract(['Earr'])['Earr'][0]
    Edet = sim_output.extract(['Edet'])['Edet'][0]

    # Count number above threshold
    N_arr_gt_Eth = np.shape(np.where(Earr > Eth))[1]
    N_det_gt_Eth = np.shape(np.where(Edet > Eth))[1]

    return (N_arr_gt_Eth / N), (N_det_gt_Eth / N)
        f.create_dataset('Eth', data=Eth)
        f.create_dataset('Eth_sim', data=Eth_sim)
        f.create_dataset('alpha', data=alpha)
        f.create_dataset('Ncr', data=Ncr)
        f.create_dataset('Ntrials', data=Ntrials)
        f.create_dataset('Ds', data=Ds)
        f.create_dataset('f_E', data=f_E)

        # Initialise
        f.create_dataset('Parr', (len(Ds), ), 'f')
        f.create_dataset('Pdet', (len(Ds), ), 'f')
        f.create_dataset('D', (len(Ds), ), 'f')

    # Compile the Stan model
    sim = stan_utility.compile_model(filename=sim_filename,
                                     model_name='uhecr_E_loss',
                                     include_paths=stan_path)

    done = False

# Start loop over Ds
for i, d in enumerate(Ds):

    if COMM.rank == 0:

        #print('D:', d)

        start_time = time.time()

        Eth = Eth
        Ncr = Ncr
Exemple #10
0
def plot_cp(count_data, R):
    """ Fit the change point model to the data and plot the result. """

    N = len(count_data)  #number of points
    prs = max(count_data) + 10  # prior took high
    nc = 6  # number of processor put high

    #fitting
    fit_data = {"N": N, "d": count_data, "pes": prs, "pls": prs}
    model_cp = stan_utility.compile_model('STAN/poisson_cp_ppc.stan')
    fit_cp = model_cp.sampling(data=fit_data,
                               iter=R,
                               chains=nc,
                               seed=4838282,
                               refresh=0)

    #probability density function
    bins = np.linspace(1, N, N + 1)
    counts = [
        np.histogram(x, bins=bins)[0] for x in fit_cp["cp"][np.newaxis, :]
    ]
    probs = [10, 20, 30, 40, 50, 60, 70, 80, 90]
    counts = [c / (R * 3) for c in counts]
    creds = [
        np.percentile([count[b] for count in counts], probs) for b in range(N)
    ]
    idxs = [idx for idx in range(N)]
    xs = [
        bins[idx] + delta for idx in range(nbins)
        for delta in [0, bins[1] - bins[0]]
    ]
    pad_creds = [creds[idx] for idx in idxs]
    proba = [c[4] for c in pad_creds]

    # posterior predictive distribution
    Xs = fit_cp['d_ppc']
    cumsums = np.array([np.cumsum(x) for x in Xs])
    probs = [10, 20, 30, 40, 50, 60, 70, 80, 90]
    creds = [np.percentile(xt, probs) for xt in cumsums.T]
    idxs = [idx for idx in range(Xs.shape[1]) for r in range(2)]
    xs = [
        idx + delta for idx in range(1, Xs.shape[1] + 1)
        for delta in [-0.5, 0.5]
    ]
    pad_creds = [creds[idx] for idx in idxs]

    #plotting
    plotline = np.cumsum(count_data)
    fig, ax1 = plt.subplots()
    ax1.fill_between(xs, [c[0] for c in pad_creds], [c[8] for c in pad_creds],
                     facecolor=light,
                     color=light,
                     label='80% predictive distribution')
    ax1.fill_between(xs, [c[0] for c in pad_creds], [c[7] for c in pad_creds],
                     facecolor=light_highlight,
                     color=light_highlight,
                     label='60%')
    ax1.fill_between(xs, [c[2] for c in pad_creds], [c[6] for c in pad_creds],
                     facecolor=mid,
                     color=mid,
                     label='40%')
    ax1.fill_between(xs, [c[3] for c in pad_creds], [c[5] for c in pad_creds],
                     facecolor=mid_highlight,
                     color=mid_highlight,
                     label='20%')
    ax1.plot(xs, [c[4] for c in pad_creds], color=dark, label='model', lw=1)
    ax1.plot(range(1, len(plotline) + 1), plotline, c='k', lw=2, label='data')
    ax1.set_xlabel('Data point', size=15)
    ax1.set_ylabel('Cumulative nomber', color='crimson', size=15)
    for tl in ax1.get_yticklabels():
        tl.set_color('crimson')
    ax2 = ax1.twinx()
    ax2.plot(proba, color='navy', linewidth=1)
    ax2.set_ylim([0, 0.7])
    ax2.set_ylabel('Probability Density Function of the change point',
                   color="navy",
                   size=12)
    for tl in ax2.get_yticklabels():
        tl.set_color("navy")
    ax1.legend(bbox_to_anchor=(0.025, 1.06),
               loc=2,
               borderaxespad=0.,
               fontsize=12)
    plt.xticks(fontsize=15)
    plt.show()
    return
Exemple #11
0
mid_highlight="#A25050"
dark="#8F2727"
dark_highlight="#7C0000"
green="#00FF00"

############################################################
#
# One-dimensional
#
############################################################

############################################################
# Create data
############################################################

model = stan_utility.compile_model('generate_data.stan')
fit = model.sampling(seed=194838, algorithm='Fixed_param', iter=1, chains=1)

data = dict(N = fit.extract()['N'].astype(numpy.int64),
            x_obs = fit.extract()['x_obs'][0,:])

pystan.stan_rdump(data, 'selection.data.R')

############################################################
# Fit model
############################################################

data = pystan.read_rdump('selection.data.R')

model = stan_utility.compile_model('selection.stan')
fit = model.sampling(data=data, chains=4, seed=4938483,
Exemple #12
0
def do_run(xpred, num_datapoints, num_feedbacks, acquisition, seed,
           save_folder, stan_folder):
    filename = save_folder + str(xpred).replace('.', '_') + '-' + str(seed)
    tr_data, beta_true = generate_data(N=num_datapoints, seed=seed)
    print('xpred: ' + str(xpred))
    # fit model on training data
    dat = {
        'n': num_datapoints,
        'npred': 2,
        'x': tr_data[0],
        'a': tr_data[1],
        'y': tr_data[2],
        'xpred': np.array([xpred, xpred]),
        'apred': np.array([0, 1])
    }
    # Compute imbalance
    imbalance = [compute_imbalance(dat['x'], dat['a'])]
    # Fit model
    model = stan_utility.compile_model(
        'logit2.stan',
        model_name='logit-' + str(xpred).replace('.', '_') + '-' + str(seed),
        model_path=stan_folder)
    fit = model.sampling(data=dat, seed=194838, chains=4, iter=2000)
    samples = fit.extract(permuted=True)
    # Compute the estimated Type S error rate
    typeSerror = [error_rate(samples)]
    # Compute decision and regret in current model
    a0 = decide(samples)
    decisions = [a0]
    regrets = [regret(beta_true, xpred, a0)]
    x_s, a_s, y_s = [], [], []
    for it in range(num_feedbacks):
        print('it: ' + str(it))

        # Elicit one feedback with different criterion
        x_star, a_star = select_query(model, samples, dat, acquisition)

        # Acquire feedback from oracle (we assume true model)
        y_star = predict_with_model(beta_true, x_star, a_star)
        # Fit new model, compute decisions and regret
        x_s += [x_star]
        a_s += [a_star]
        y_s += [y_star]
        dat = append_dat_stan(dat, x_star, a_star, y_star)
        # Compute imbalance
        imbalance += [compute_imbalance(dat['x'], dat['a'])]
        # Re-fit the model
        fit = model.sampling(data=dat, seed=194838, chains=4, iter=2000)
        samples = fit.extract(permuted=True)

        typeSerror += [error_rate(samples)]
        a1 = decide(samples)
        decisions += [a1]
        regrets += [regret(beta_true, xpred, a1)]
    print(filename)
    dat_save = {
        'regrets': regrets,
        'x_s': x_s,
        'a_s': a_s,
        'y_s': y_s,
        'imbalances': imbalance,
        'typeSerrors': typeSerror,
        'decisions': decisions
    }
    pickle.dump(dat_save, open(filename + ".p", "wb"))
  for (j in 1:J)
    z_u[j] ~ normal(p_r[1],p_r[2]);

  //likelihood
  for (n in 1:N)
    mu[n] = alpha + X_beta[n] * beta + Z_u[n] * u[subj[n]];
  y ~ normal(mu, sigma);
}

generated quantities {
  matrix[n_u,n_u] Cor_u;
  vector[N] log_lik;
  vector[N] y_hat;              // predicted y
  real raw_intercept;           //raw intercept if log values
  vector[P-1] raw_beta;         //raw effect size if log values
  Cor_u = tcrossprod(L_u);      //Correlations between random effects by subj
  if (logT == 1) {
    raw_intercept = exp(alpha);
    raw_beta = exp(alpha + beta) - raw_intercept;
  }
  for (n in 1:N){
    log_lik[n] = normal_lpdf(y[n] | alpha + X_beta[n] * beta + Z_u[n] * u[subj[n]], sigma);
    y_hat[n] = normal_rng(alpha + X_beta[n] * beta + Z_u[n] * u[subj[n]], sigma);
  }
}
"""
f = open("LME.stan", "w+")
f.write(LME)
f.close()
LME = stan_utility.compile_model('LME.stan', model_name="LME")
Exemple #14
0
import stan_utility
import pystan
import numpy as np
model = stan_utility.compile_model('gbm_sum.stan', model_name='test_gbm2')

data = pystan.read_rdump('alpha_data.R')

N_gen_spectra = 100
model_energy = np.logspace(0,5,N_gen_spectra)
data['N_gen_spectra'] = N_gen_spectra
data['model_energy'] = model_energy

warmup = 1000
iter = 100

total = warmup + iter

chains = 8

fit = model.sampling(
    data=data,
    iter=total,
    warmup=warmup,
    chains=chains,
    n_jobs=chains,
    control=dict(max_treedepth=13,
    #             adapt_delta=0.9
    ),
    seed=1234)

stan_utility.stanfit_to_hdf5(fit, '/data/jburgess/stan_fits/gbm_stan_fit_small.h5')
Exemple #15
0
def run_linear_model(model_src, plot_title, save_loc):
    """
    Run linear model.
    :param model_src: source of stan model.
    :param plot_title: title of output plot.
    :param save_loc: location of output plot.
    :return:
    """
    n = data.shape[0]
    m = 5
    x = data[['BTC price']].values.flatten()[0:n]
    y1 = data[['DASH price']].values.flatten()[0:n]
    y2 = data[['LTC price']].values.flatten()[0:n]
    y3 = data[['ETH price']].values.flatten()[0:n]
    y4 = data[['ETC price']].values.flatten()[0:n]
    p = np.linspace(data[['BTC price']].min(), data[['BTC price']].max(), m)
    print(p)

    model = stan_utility.compile_model(model_src)

    data1 = dict(N=n, M=m, x=x, y=y1, xpreds=p)
    fit1 = model.sampling(data=data1, seed=74749)
    samples1 = fit1.extract(permuted=True)
    print_loo_and_ks(samples1)

    data2 = dict(N=n, M=m, x=x, y=y2, xpreds=p)
    fit2 = model.sampling(data=data2, seed=74749)
    samples2 = fit2.extract(permuted=True)
    print_loo_and_ks(samples2)

    data3 = dict(N=n, M=m, x=x, y=y3, xpreds=p)
    fit3 = model.sampling(data=data3, seed=74749)
    samples3 = fit3.extract(permuted=True)
    print_loo_and_ks(samples3)

    data4 = dict(N=n, M=m, x=x, y=y4, xpreds=p)
    fit4 = model.sampling(data=data4, seed=74749)
    samples4 = fit4.extract(permuted=True)
    print_loo_and_ks(samples4)

    f, axes = plt.subplots(2, 2, figsize=(14, 10))
    preds = samples1['ypreds'].T
    ax = axes[0, 0]
    ax.scatter(data['BTC price'], data['DASH price'], alpha=0.5)
    ax.set_ylabel('DASH price')
    ax.set_xlabel('BTC price')
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    for i in range(m):
        ax.scatter([p[i]] * len(preds[i]), preds[i], alpha=0.1, c='g')
        ax.scatter(p[i], np.mean(preds[i]), c='r')

    preds = samples2['ypreds'].T
    ax = axes[0, 1]
    ax.scatter(data['BTC price'], data['LTC price'], alpha=0.5)
    ax.set_ylabel('LTC price')
    ax.set_xlabel('BTC price')
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    for i in range(m):
        ax.scatter([p[i]] * len(preds[i]), preds[i], alpha=0.1, c='g')
        ax.scatter(p[i], np.mean(preds[i]), c='r')

    preds = samples3['ypreds'].T
    ax = axes[1, 0]
    ax.scatter(data['BTC price'], data['ETH price'], alpha=0.5)
    ax.set_ylabel('ETH price')
    ax.set_xlabel('BTC price')
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    for i in range(m):
        ax.scatter([p[i]] * len(preds[i]), preds[i], alpha=0.1, c='g')
        ax.scatter(p[i], np.mean(preds[i]), c='r')

    preds = samples4['ypreds'].T
    ax = axes[1, 1]
    ax.scatter(data['BTC price'], data['ETC price'], alpha=0.5)
    ax.set_ylabel('ETC price')
    ax.set_xlabel('BTC price')
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    for i in range(m):
        ax.scatter([p[i]] * len(preds[i]), preds[i], alpha=0.1, c='g')
        ax.scatter(p[i], np.mean(preds[i]), c='r')

    red_patch = mpatches.Patch(color='red', )
    green_patch = mpatches.Patch(color='green')
    f.legend([red_patch, green_patch], ['mean', 'spread'],
             loc='upper right',
             ncol=1)
    f.suptitle(plot_title, fontsize=14)
    plt.savefig(save_loc)
    plt.show()
Exemple #16
0
data['zcmb'] = np.array(jla_data_set['zcmb'])

data['c_obs'] = np.array(jla_data_set['color'])
data['c_sigma'] = np.array(jla_data_set['dcolor'])

data['x1_obs'] = np.array(jla_data_set['x1'])
data['x1_sigma'] = np.array(jla_data_set['dx1'])

data['m_obs'] = np.array(jla_data_set['mb'])
data['m_sigma'] = np.array(jla_data_set['dmb'])
data['N_model'] = n_model
data['z_model'] = z_model

n_warmup = 1000
n_samp = 250
iter = n_warmup + n_samp
n_chain = 4

model = stan_utility.compile_model('supernove_model.stan', 'sn_cosmo')

fit = model.sampling(data=data,
                     iter=iter,
                     warmup=n_warmup,
                     chains=n_chain,
                     n_jobs=n_chain,
                     thin=1,
                     seed=1234,
                     control=dict(max_treedepth=13, adapt_delta=0.95))

stan_utility.stanfit_to_hdf5(fit, 'sncosmo_fit.h5')
##    generate fake data according to the data-generating process described by
##    your model.
## 2. Follow the rest of this workflow with your fake data!
#############
# To use Stan to generate fake data, we use the `generated quantities` block
# and the 'Fixed_param' algorithm. The following snippet will generate
# 200 fake data points.
# gen.model = stan_model("gen.stan")
# gen.data = sampling(data=d, algorithm='Fixed_param', warmup=0, iter=200)
# gendf = as.data.frame(gen.data)
# dataset.1 = gendf[1,]

#############
## Fit the model
#############
model = util.compile_model("../pooled.stan")
fit = model.sampling(dict(log_radon=df.log_radon,
                          basement = df.basement,
                          N=df.shape[0]))
# #############
# ## Check diagnostics
# #############
util.check_all_diagnostics(fit)

# #############
# ## Check & graph fit
# #############
print(fit)
sample = fit.extract()

# # This line just plots our data by basement indicator
Exemple #18
0
data['x1_obs'] = np.array(jla_data_set['x1'])
data['x1_sigma'] = np.array(jla_data_set['dx1'])

data['m_obs'] = np.array(jla_data_set['mb'])
data['m_sigma'] = np.array(jla_data_set['dmb'])
data['N_model'] = n_model
data['z_model'] = z_model


n_warmup=1000
n_samp = 250
iter = n_warmup + n_samp
n_chain = 4


model = stan_utility.compile_model('supernovae_simple.stan','sn_cosmo_simple');


fit = model.sampling(
    data=data,
    iter=iter,
    warmup=n_warmup,
    chains=n_chain,
    n_jobs=n_chain,
    thin=1,
    seed=1234,
    control=dict(max_treedepth=13, adapt_delta=0.95))

stan_utility.stanfit_to_hdf5(fit, 'sncosmo_simple_fit.h5')
data['h0'] = 70.
data['zcmb'] = np.array(jla_data_set['zcmb'])

data['c_obs'] = np.array(jla_data_set['color'])
data['c_sigma'] = np.array(jla_data_set['dcolor'])

data['x1_obs'] = np.array(jla_data_set['x1'])
data['x1_sigma'] = np.array(jla_data_set['dx1'])

data['m_obs'] = np.array(jla_data_set['mb'])
data['m_sigma'] = np.array(jla_data_set['dmb'])

n_warmup = 1000
n_samp = 250
iter = n_warmup + n_samp
n_chain = 4

model = stan_utility.compile_model('supernove_model_bias.stan',
                                   'sn_cosmo_bias')

fit = model.sampling(data=data,
                     iter=iter,
                     warmup=n_warmup,
                     chains=n_chain,
                     n_jobs=n_chain,
                     thin=1,
                     seed=1234,
                     control=dict(max_treedepth=13, adapt_delta=0.85))

stan_utility.stanfit_to_hdf5(fit, 'sncosmo_bias_fit.h5')
Exemple #20
0
    for m in range(M):
        running_samples = x0[0:iters[m]]
        mcmc_stats = compute_mcmc_stats(running_samples)
        x1_mean[m] = mcmc_stats[0]
        x1_se[m] = mcmc_stats[1]

    return iters, x1_mean, x1_se


############################################################
# Normal Model
############################################################

# Compile Stan program and fit with dynamic Hamiltonian Monte Carlo
model = stan_utility.compile_model('normal.stan')
fit = model.sampling(seed=4838282)

# Check diagnostics
stan_utility.check_all_diagnostics(fit)

# Check MCMC estimators
print(fit)

############################################################
# Student-t Model
############################################################

model = stan_utility.compile_model('student_t.stan')

# 100 degrees of freedom
Exemple #21
0
                  facecolor=mid_highlight,
                  color=mid_highlight)
plot.plot(xs, [c[4] for c in pad_creds], color=dark)

plot.gca().set_xlim([min(bins), max(bins)])
plot.gca().set_xlabel("y")
plot.gca().set_ylim([0, max([c[8] for c in creds])])
plot.gca().set_ylabel("Prior Predictive Distribution")

plot.axvline(x=25, linewidth=2.5, color="white")
plot.axvline(x=25, linewidth=2, color="black")

plot.show()
float(len([y for y in simu_ys.flatten() if y > 25])) / len(simu_ys.flatten())
simus = zip(simu_lambdas, simu_ys)
fit_model = stan_utility.compile_model('fit_data.stan')


def analyze_simu(simu):
    simu_l = simu[0]
    simu_y = simu[1]

    # Fit the simulated observation
    input_data = dict(N=N, y=simu_y)

    fit = fit_model.sampling(data=input_data, seed=4938483, n_jobs=1)

    # Compute diagnostics
    warning_code = stan_utility.check_all_diagnostics(fit, quiet=True)

    # Compute rank of prior draw with respect to thinned posterior draws