def create_test_case(): n_obs = 2 n_sim = 5 p = 3 q = 4 ell_sim = 80 ell_obs = 20 t = np.random.uniform(0, 1, (n_sim, q)) x = 0.5 * np.ones((n_sim, p)) y_ind = np.linspace(0, 100, ell_sim) y = 10 * np.random.normal( 0, 1, (n_sim, 1)) * (y_ind[None, :] - 50)**2 / 75. + 20 * np.random.normal( 0, 1, (n_sim, 1)) * y_ind[None, :] + 20 * np.random.normal( 0, 1, (n_sim, 1)) x_obs = 0.5 * np.ones((n_obs, p)) y_obs_ind = np.linspace(10, 85, ell_obs) y_obs = 10 * np.random.normal(0, 1, (n_obs, 1)) * ( y_obs_ind[None, :] - 50)**2 / 75. + 20 * np.random.normal( 0, 1, (n_obs, 1)) * y_obs_ind[None, :] + 20 * np.random.normal( 0, 1, (n_obs, 1)) data = SepiaData(x_sim=x, t_sim=t, y_sim=y, y_ind_sim=y_ind, x_obs=x_obs, y_obs=y_obs, y_ind_obs=y_obs_ind) data.standardize_y() data.transform_xt() data.create_K_basis(n_pc=3) data.create_D_basis('constant') # Save as matfile for testing in matlab savedict = { 't': t, 'y': y, 'y_obs': y_obs, 'D': data.obs_data.D, 'Kobs': data.obs_data.K, 'Ksim': data.sim_data.K, 'y_obs_std': data.obs_data.y_std, 'y_sim_std': data.sim_data.y_std, 'y_sd': data.sim_data.orig_y_sd } scipy.io.savemat('data/test_case_matlab.mat', savedict) g = setup_model(data) # Save pickle file of results savedict = {'model': g, 'data': data} with open('data/test_case_python_model.pkl', 'wb') as f: pickle.dump(savedict, f)
def setup_multi_sim_and_obs_sharedtheta(m=100, n=10, nt_sim=20, nt_obs=15, noise_sd=0.1, nx=5, n_pc=10, seed=42., n_lik=0, n_mcmc=0, n_pred=0, n_shared=2, clist=[], fix_K=False): try: eng = matlab.engine.start_matlab() eng.cd(root_path) eng.addpath('matlab/', nargout=0) res = eng.setup_multi_sim_and_obs_sharedtheta(m, n, nt_sim, nt_obs, noise_sd, nx, n_pc, seed, n_lik, n_mcmc, n_pred, n_shared, matlab.double(clist), nargout=1) eng.quit() except Exception as e: print(e) print( 'Matlab error; make sure matlab.engine installed, check Matlab code for errors.' ) y = np.array(res['y'], dtype=float) # (m, nt_sim, n_shared) y_ind = np.array(res['y_ind'], dtype=float).squeeze() # (nt_sim, n_shared) xt = np.array(res['xt'], dtype=float) # (m, nx, n_shared) y_obs = np.array(res['y_obs'], dtype=float) # (n, nt_sim, n_shared) y_ind_obs = np.array(res['y_ind_obs'], dtype=float).squeeze() # (nt_obs, n_shared) x_obs = np.array(res['x_obs'], dtype=float) # (n, 1, n_shared) model_list = [] for i in range(n_shared): data = SepiaData(x_sim=xt[:, 0, i][:, None], t_sim=xt[:, 1:, i], y_sim=y[:, :, i], y_ind_sim=y_ind[:, i], x_obs=x_obs[:, :, i], y_obs=y_obs[:, :, i], y_ind_obs=y_ind_obs[:, i]) data.standardize_y() data.transform_xt() data.create_K_basis(n_pc) model = SepiaModel(data) model_list.append(model) return model_list, res
def test_sim_only_x_only(self): m = 20 x = np.random.uniform(-1, 3, (m, 3)) y = np.random.normal(size=(m, 50)) y_ind = np.linspace(0, 1, 50) data = SepiaData(x_sim=x, y_sim=y, y_ind_sim=y_ind) call_data_methods(data, discrep=False) model = SepiaModel(data) call_model_methods(model) call_plot_functions(model) samples = model.get_samples() pred = SepiaEmulatorPrediction(x_pred=x, samples=samples, model=model, addResidVar=True, storeMuSigma=True) pred.get_w() pred.get_y() pred.get_y(std=True) pred.get_mu_sigma() xpred = SepiaXvalEmulatorPrediction(samples=samples, model=model, addResidVar=True, storeMuSigma=True) xpred.get_w() xpred.get_y() pred.get_y(std=True) xpred.get_mu_sigma()
def test_sim_only_x_and_t(self): m = 20 x = np.random.uniform(-1, 3, (m, 3)) t = np.random.uniform(-1, 3, (m, 2)) y = np.random.normal(size=(m, 1)) data = SepiaData(x_sim=x, t_sim=t, y_sim=y) call_data_methods(data) model = SepiaModel(data) call_model_methods(model) call_plot_functions(model) samples = model.get_samples() pred = SepiaEmulatorPrediction(x_pred=x, t_pred=t, samples=samples, model=model, addResidVar=True, storeMuSigma=True) pred.get_w() pred.get_y() pred.get_y(std=True) pred.get_mu_sigma() xpred = SepiaXvalEmulatorPrediction(samples=samples, model=model, addResidVar=True, storeMuSigma=True) xpred.get_w() xpred.get_y() xpred.get_y(std=True) xpred.get_mu_sigma()
def test_sim_and_obs_noD_x_and_t(self): m = 20 n = 10 x = np.random.uniform(-1, 3, (m, 3)) x2 = np.random.uniform(-1, 3, (n, 3)) t = np.random.uniform(-1, 3, (m, 2)) y = np.random.normal(size=(m, 50)) y_ind = np.linspace(0, 1, 50) y2 = np.random.normal(size=(n, 20)) y_ind2 = np.linspace(0, 1, 20) data = SepiaData(x_sim=x, t_sim=t, y_sim=y, y_ind_sim=y_ind, x_obs=x2, y_obs=y2, y_ind_obs=y_ind2) call_data_methods(data, discrep=False) model = SepiaModel(data) call_model_methods(model) call_plot_functions(model) samples = model.get_samples() pred = SepiaEmulatorPrediction(x_pred=x, t_pred=t, samples=samples, model=model, addResidVar=True, storeMuSigma=True) pred.get_w() pred.get_y() pred.get_y(std=True) pred.get_mu_sigma() xpred = SepiaXvalEmulatorPrediction(samples=samples, model=model, addResidVar=True, storeMuSigma=True) xpred.get_w() xpred.get_y() xpred.get_y(std=True) xpred.get_mu_sigma() pred = SepiaFullPrediction(x_pred=x, t_pred=t, samples=samples, model=model, addResidVar=True, storeMuSigma=True) pred.get_u_v() pred.get_mu_sigma() pred.get_ysim() pred.get_ysim(as_obs=True) pred.get_ysim(as_obs=True, std=True) pred.get_yobs() pred.get_yobs(as_obs=True) pred.get_yobs(as_obs=True, std=True)
def test_sim_and_obs_noD_t_only_cat(self): m = 20 n = 10 t = np.concatenate([ np.random.uniform(-1, 3, (m, 3)), 1 + np.random.choice(3, size=(m, 1)) ], axis=1) y = np.random.normal(size=(m, 50)) y_ind = np.linspace(0, 1, 50) y2 = np.random.normal(size=(n, 20)) y_ind2 = np.linspace(0, 1, 20) cat_inds = [0, 0, 0, 3] data = SepiaData(t_sim=t, y_sim=y, y_ind_sim=y_ind, y_obs=y2, y_ind_obs=y_ind2, t_cat_ind=cat_inds) call_data_methods(data, discrep=False) model = SepiaModel(data) call_model_methods(model) call_plot_functions(model) samples = model.get_samples() pred = SepiaEmulatorPrediction(t_pred=t, samples=samples, model=model, addResidVar=True, storeMuSigma=True) pred.get_w() pred.get_y() pred.get_y(std=True) pred.get_mu_sigma() xpred = SepiaXvalEmulatorPrediction(samples=samples, model=model, addResidVar=True, storeMuSigma=True) xpred.get_w() xpred.get_y() pred.get_y(std=True) xpred.get_mu_sigma() pred = SepiaFullPrediction(t_pred=t, samples=samples, model=model, addResidVar=True, storeMuSigma=True) pred.get_u_v() pred.get_mu_sigma() pred.get_ysim() pred.get_ysim(as_obs=True) pred.get_ysim(as_obs=True, std=True) pred.get_yobs() pred.get_yobs(as_obs=True) pred.get_yobs(as_obs=True, std=True)
def setup_multi_sim_and_obs_noD(m=100, n=10, nt_sim=20, nt_obs=15, noise_sd=0.1, nx=5, n_pc=10, seed=42., n_lik=0, n_mcmc=0): try: eng = matlab.engine.start_matlab() eng.cd(root_path) eng.addpath('matlab/', nargout=0) res = eng.setup_multi_sim_and_obs_noD(m, n, nt_sim, nt_obs, noise_sd, nx, n_pc, seed, n_lik, n_mcmc, nargout=1) eng.quit() except Exception as e: print(e) print( 'Matlab error; make sure matlab.engine installed, check Matlab code for errors.' ) y = np.array(res['y'], dtype=float) y_ind = np.array(res['y_ind'], dtype=float).squeeze() xt = np.array(res['xt'], dtype=float) y_obs = np.array(res['y_obs'], dtype=float) y_ind_obs = np.array(res['y_ind_obs'], dtype=float).squeeze() x_obs = np.array(res['x_obs'], dtype=float) data = SepiaData(x_sim=xt[:, 0][:, None], t_sim=xt[:, 1:], y_sim=y, y_ind_sim=y_ind, x_obs=x_obs, y_obs=y_obs, y_ind_obs=y_ind_obs) data.standardize_y() data.transform_xt() data.create_K_basis(n_pc) print(data) model = SepiaModel(data) return model, res
def setup_univ_sim_only(m=300, seed=42., n_lik=0, n_mcmc=0, n_pred=0, n_lev=0, n_burn=0, sens=0): try: eng = matlab.engine.start_matlab() eng.cd(root_path) eng.addpath('matlab/', nargout=0) res = eng.setup_univ_sim_only(m, seed, n_lik, n_mcmc, n_pred, n_lev, n_burn, sens, nargout=1) eng.quit() except Exception as e: print(e) print( 'Matlab error; make sure matlab.engine installed, check Matlab code for errors.' ) y = np.array(res['y'], dtype=float) xt = np.array(res['xt'], dtype=float) data = SepiaData(x_sim=xt[:, 0][:, None], t_sim=xt[:, 1][:, None], y_sim=y) print(data) data.standardize_y() data.transform_xt() model = SepiaModel(data) return model, res
def setup_univ_sim_and_obs(m=100, n=50, seed=42., n_lik=0, n_mcmc=0, n_pred=0): try: eng = matlab.engine.start_matlab() eng.cd(root_path) eng.addpath('matlab/', nargout=0) res = eng.setup_univ_sim_and_obs(m, n, seed, n_lik, n_mcmc, n_pred, nargout=1) eng.quit() except Exception as e: print(e) print( 'Matlab error; make sure matlab.engine installed, check Matlab code for errors.' ) y = np.array(res['y'], dtype=float) xt = np.array(res['xt'], dtype=float) y_obs = np.array(res['y_obs'], dtype=float) x_obs = np.array(res['x_obs'], dtype=float).reshape((n, 1)) data = SepiaData(x_sim=xt[:, 0][:, None], t_sim=xt[:, 1][:, None], y_sim=y, x_obs=x_obs, y_obs=y_obs) data.standardize_y() data.transform_xt() print(data) model = SepiaModel(data) return model, res
def test_univariate_sim_only_setup(self): """ Tests setup for univariate sim only model """ d = SepiaData(t_sim=self.data_dict['t_sim'], y_sim=self.data_dict['y_sim']) print('Testing univariate sim-only SepiaModelSetup...', flush=True) print(d, flush=True) # Try it without doing standardization/transform to be sure it doesn't break model_notrans = setup_model(copy.deepcopy(d)) # Do explicit transformation d.transform_xt() d.standardize_y() model = setup_model(d) # Check that either way gives same transformation self.assertTrue(np.allclose(model_notrans.data.sim_data.orig_y_mean, model.data.sim_data.orig_y_mean)) self.assertTrue(np.allclose(model_notrans.data.sim_data.orig_y_sd, model.data.sim_data.orig_y_sd)) self.assertTrue(np.allclose(model_notrans.data.sim_data.y_std, model.data.sim_data.y_std)) self.assertTrue(np.allclose(model_notrans.data.sim_data.t_trans, model.data.sim_data.t_trans)) # Check model components are set up as expected self.assertTrue(model.num.scalar_out) self.assertTrue(model.num.sim_only) self.assertTrue(model.num.m == 100) self.assertTrue(model.num.n == 0) self.assertTrue(model.num.p == 1) self.assertTrue(model.num.q == 1) self.assertTrue(model.num.pu == 1) self.assertTrue(model.num.pv == 0) self.assertTrue(np.allclose(model.num.w, model.data.sim_data.y_std)) # Check parameter setup -- betaU betaU = model.params.betaU self.assertTrue(betaU.val_shape == (2, 1)) self.assertTrue(betaU.prior.dist == 'Beta') self.assertTrue(betaU.mcmc.stepType == 'BetaRho') # Check parameter setup -- lamUz lamUz = model.params.lamUz self.assertTrue(lamUz.val_shape == (1, 1)) self.assertTrue(lamUz.prior.dist == 'Gamma') self.assertTrue(lamUz.mcmc.stepType == 'PropMH') # Check parameter setup -- lamWOs lamWOs = model.params.lamWOs self.assertTrue(lamWOs.val_shape == (1, 1)) self.assertTrue(lamWOs.prior.dist == 'Gamma') self.assertTrue(lamWOs.mcmc.stepType == 'PropMH') # Check parameter setup -- lamWs lamWs = model.params.lamWs self.assertTrue(lamWs.val_shape == (1, 1)) self.assertTrue(lamWs.prior.dist == 'Gamma') self.assertTrue(lamWs.mcmc.stepType == 'PropMH') mcmc_list_names = [p.name for p in model.params.mcmcList] self.assertTrue(set(mcmc_list_names) == set(['betaU', 'lamUz', 'lamWOs', 'lamWs']))
def test_multivariate_sim_and_obs_ragged_setup(self): m = 700 # number of simulated observations p = 3 # dimension of x (simulation inputs) ell_sim = 1000 # dimension of y output sim pu = 3 # number of PCs q = 2 # dimension of t (extra sim inputs) n = 5 # number of observed observations ell_obs = np.random.randint(100, 600, n) y_ind_sim = np.linspace(0, 100, ell_sim) K_true_sim = np.vstack([ 0.5 * (np.sin(y_ind_sim) + 1), np.square(-y_ind_sim + 50) / 2500, y_ind_sim / 100 ]) y_sim = np.transpose( np.log(1 + y_ind_sim)[:, None] + np.dot( K_true_sim.T, 2 * np.array([1, 0.5, 0.2])[:, None] * np.random.normal(0, 1, (pu, m)))) x_sim = 0.5 * np.random.uniform(-1, 3, (m, p)) t = np.random.uniform(-10, 10, (m, q)) y_ind_obs = [ np.linspace(0, 100, ell_obs[i]) + np.random.uniform(-3, 3, ell_obs[i]) for i in range(len(ell_obs)) ] for yi in y_ind_obs: yi[yi < 0] = 0 K_true_obs = [ np.vstack( [0.5 * (np.sin(yi) + 1), np.square(-yi + 50) / 2500, yi / 100]) for yi in y_ind_obs ] y_obs = [ 10 + np.squeeze( np.log(1 + y_ind_obs[i])[:, None] + np.dot( K_true_obs[i].T, 2 * np.array([1, 0.5, 0.2])[:, None] * np.random.normal(0, 1, (pu, 1)))) for i in range(len(y_ind_obs)) ] x_obs = 0.5 * np.random.uniform(-1, 3, (n, p)) d = SepiaData(x_sim=x_sim, y_sim=y_sim, t_sim=t, y_ind_sim=y_ind_sim, x_obs=x_obs, y_obs=y_obs, y_ind_obs=y_ind_obs) model = setup_model(d)
def test_univariate_sim_only_x_only_cat_ind(self): """ Tests setup for univariate sim only where we only use an x input, not t, and use x_cat_ind """ m = 200 # number of simulated observations p = 3 # dimension of x (simulation inputs) x = np.concatenate([ 0.5 * np.random.uniform(-1, 3, (m, p - 1)), np.random.choice(range(1, 5), (m, 1), replace=True) ], axis=1) y = 5 * np.random.normal(0, 1, m) + 2 x_cat_ind = [0, 0, 4] d = SepiaData(x_sim=x, y_sim=y, x_cat_ind=x_cat_ind) print('Testing univariate sim-only SepiaData...') print(d) d.transform_xt() self.assertTrue(np.allclose(d.sim_data.x_trans[:, 2], x[:, 2])) self.assertEqual(np.min(d.sim_data.x_trans[:, 2]), 1) self.assertEqual(np.max(d.sim_data.x_trans[:, 2]), 4)
def test_sim_only_x_and_t_cat(self): m = 20 x = np.concatenate([ np.random.uniform(-1, 3, (m, 3)), 1 + np.random.choice(3, size=(m, 1)) ], axis=1) t = np.concatenate([ np.random.uniform(-1, 3, (m, 2)), 1 + np.random.choice(4, size=(m, 1)) ], axis=1) x_cat_ind = [0, 0, 0, 3] t_cat_ind = [0, 0, 4] y = np.random.normal(size=(m, 50)) y_ind = np.linspace(0, 1, 50) data = SepiaData(x_sim=x, t_sim=t, x_cat_ind=x_cat_ind, t_cat_ind=t_cat_ind, y_sim=y, y_ind_sim=y_ind) call_data_methods(data, discrep=False) model = SepiaModel(data) call_model_methods(model) call_plot_functions(model) samples = model.get_samples() pred = SepiaEmulatorPrediction(x_pred=x, t_pred=t, samples=samples, model=model, addResidVar=True, storeMuSigma=True) pred.get_w() pred.get_y() pred.get_y(std=True) pred.get_mu_sigma() xpred = SepiaXvalEmulatorPrediction(samples=samples, model=model, addResidVar=True, storeMuSigma=True) xpred.get_w() xpred.get_y() xpred.get_y(std=True) xpred.get_mu_sigma()
def test_multivariate_sim_and_obs_lik(self): """ Tests log lik for multivariate sim and obs model """ d = SepiaData(t_sim=self.multi_data_dict['t_sim'], y_sim=self.multi_data_dict['y_sim'], y_ind_sim=self.multi_data_dict['y_ind_sim'], y_obs=self.multi_data_dict['y_obs'], y_ind_obs=self.multi_data_dict['y_ind_obs']) print('Testing multivariate sim-only SepiaLogLik...', flush=True) print(d, flush=True) d.transform_xt() d.standardize_y() d.create_K_basis(5) d.create_D_basis('linear') model = setup_model(d) model.logLik() for param in model.params.mcmcList: for cindex in range(int(np.prod(param.val_shape))): model.logLik(cvar=param.name, cindex=cindex)
def test_univariate_sim_only_t_only_cat_ind(self): """ Tests setup for univariate sim only where we only use a t input, not x; x is set up as a dummy internally. Use t_cat_ind. """ m = 200 # number of simulated observations p = 3 # dimension of x (simulation inputs) t = np.concatenate([ 0.5 * np.random.uniform(-1, 3, (m, p - 1)), np.random.choice(range(1, 5), (m, 1), replace=True) ], axis=1) y = 5 * np.random.normal(0, 1, m) + 2 t_cat_ind = [0, 0, 4] d = SepiaData(x_sim=None, y_sim=y, t_sim=t, t_cat_ind=t_cat_ind) print('Testing univariate sim-only SepiaData...') print(d) d.transform_xt() self.assertTrue(np.allclose(d.sim_data.t_trans[:, 2], t[:, 2])) self.assertEqual(np.min(d.sim_data.t_trans[:, 2]), 1) self.assertEqual(np.max(d.sim_data.t_trans[:, 2]), 4)
def setup_multi_sim_only(m=300, nt=20, nx=5, n_pc=10, seed=42., n_lik=0, n_mcmc=0, n_pred=0, fix_K=False, sens=0): try: eng = matlab.engine.start_matlab() eng.cd(root_path) eng.addpath('matlab/', nargout=0) res = eng.setup_multi_sim_only(m, nt, nx, n_pc, seed, n_lik, n_mcmc, n_pred, sens, nargout=1) eng.quit() except Exception as e: print(e) print( 'Matlab error; make sure matlab.engine installed, check Matlab code for errors.' ) y = np.array(res['y'], dtype=float) y_ind = np.array(res['y_ind'], dtype=float).squeeze() xt = np.array(res['xt'], dtype=float) data = SepiaData(x_sim=xt[:, 0][:, None], t_sim=xt[:, 1:], y_sim=y, y_ind_sim=y_ind) data.standardize_y() data.transform_xt() data.create_K_basis(n_pc) if fix_K: data.sim_data.K = np.array(res['K']).T print(data) model = SepiaModel(data) return model, res
def test_sim_and_obs_t_only(self): m = 20 n = 10 t = np.random.uniform(-1, 3, (m, 3)) y = np.random.normal(size=(m, 1)) y2 = np.random.normal(size=(n, 1)) data = SepiaData(t_sim=t, y_sim=y, y_obs=y2) call_data_methods(data) model = SepiaModel(data) call_model_methods(model) call_plot_functions(model) samples = model.get_samples() pred = SepiaEmulatorPrediction(t_pred=t, samples=samples, model=model, addResidVar=True, storeMuSigma=True) pred.get_w() pred.get_y() pred.get_y(std=True) pred.get_mu_sigma() xpred = SepiaXvalEmulatorPrediction(samples=samples, model=model, addResidVar=True, storeMuSigma=True) xpred.get_w() xpred.get_y() xpred.get_y(std=True) xpred.get_mu_sigma() pred = SepiaFullPrediction(t_pred=t, samples=samples, model=model, addResidVar=True, storeMuSigma=True) pred.get_u_v() pred.get_mu_sigma() pred.get_ysim() pred.get_ysim(as_obs=True) pred.get_ysim(as_obs=True, std=True) pred.get_yobs() pred.get_yobs(as_obs=True) pred.get_yobs(as_obs=True, std=True)
def test_sim_only_x_only_cat(self): m = 20 x = np.concatenate([ np.random.uniform(-1, 3, (m, 3)), 1 + np.random.choice(3, size=(m, 1)) ], axis=1) y = np.random.normal(size=(m, 1)) cat_inds = [0, 0, 0, 3] data = SepiaData(x_sim=x, y_sim=y, x_cat_ind=cat_inds) call_data_methods(data) model = SepiaModel(data) call_model_methods(model) call_plot_functions(model) samples = model.get_samples() pred = SepiaEmulatorPrediction(x_pred=x, samples=samples, model=model, addResidVar=True, storeMuSigma=True) pred.get_w() pred.get_y() pred.get_y(std=True) pred.get_mu_sigma() xpred = SepiaXvalEmulatorPrediction(samples=samples, model=model, addResidVar=True, storeMuSigma=True) xpred.get_w() xpred.get_y() xpred.get_y(std=True) xpred.get_mu_sigma()
def test_univariate_sim_only_lik(self): """ Tests log lik for univariate sim only model """ d = SepiaData(t_sim=self.univ_data_dict['t_sim'], y_sim=self.univ_data_dict['y_sim']) print('Testing univariate sim-only SepiaLogLik...', flush=True) print(d, flush=True) d.transform_xt() d.standardize_y() model = setup_model(d) model.logLik() for param in model.params.mcmcList: for cindex in range(int(np.prod(param.val_shape))): model.logLik(cvar=param.name, cindex=cindex)
def setup_neddermeyer(seed=42., n_mcmc=100, sens=1, n_burn=0, n_lev=0): try: eng = matlab.engine.start_matlab() eng.cd(root_path) eng.addpath('matlab/', nargout=0) #dataStruct = eng.neddeg(0, nargout=1) res = eng.setup_neddermeyer(seed, n_mcmc, sens, n_burn, n_lev) eng.quit() except Exception as e: print(e) print( 'Matlab error; make sure matlab.engine installed, check Matlab code for errors.' ) # get python model import pickle data_dict = pickle.load( open('../../examples/Neddermeyer/pkls/nedderDataDict.pkl', 'rb')) data = SepiaData(x_sim=data_dict['x_sim'],t_sim=data_dict['t_sim'],y_sim=data_dict['y_sim'],\ y_ind_sim=data_dict['y_ind_sim'],x_obs=data_dict['x_obs'],y_obs=data_dict['y_obs'],\ y_ind_obs=data_dict['y_ind_obs']) data.transform_xt() # set orig sim vals and Ksim data.sim_data.orig_y_mean = data_dict['sim_orig_y_mean'] data.sim_data.orig_y_sd = data_dict['sim_orig_y_sd'] data.sim_data.y_std = data_dict['sim_y_std'] data.sim_data.K = data_dict['Ksim'] # set orig obs vals and Kobs data.obs_data.orig_y_mean = data_dict['obs_orig_y_mean'] data.obs_data.orig_y_sd = data_dict['obs_orig_y_sd'] data.obs_data.y_std = data_dict['obs_orig_y_std'] data.obs_data.K = data_dict['Kobs'] # call create_D_basis data.create_D_basis(D_obs=data_dict['Dobs'], D_sim=data_dict['Dsim']) model = SepiaModel(data) return model, res
#%autoreload 2 #%% seed = 42 # random seed m = 100 # number of simulated observations n = 1 # number of observed data sig_n = 0.01 # observation noise SD data_dict = generate_multi_sim_and_obs(m=m, n=n, sig_n=sig_n, seed=seed) #%% data = SepiaData(t_sim=data_dict['t_sim'], y_sim=data_dict['y_sim'], y_ind_sim=data_dict['y_ind_sim'], y_obs=data_dict['y_obs'], y_ind_obs=data_dict['y_ind_obs']) print(data) plt.plot(data.sim_data.y_ind, data.sim_data.y.T) plt.plot(data.obs_data.y_ind, data.obs_data.y.T, 'k.', linewidth=3) plt.title('Synthetic data (obs. in black)') plt.xlabel('y index') plt.ylabel('y') plt.show() #%% data.transform_xt()
sel_features = [3, 5, 7, 9] y_sim = 10000 * np.concatenate([ sim_s104[:, sel_features], sim_s105[:, sel_features], sim_s106[:, sel_features] ], axis=1) y_obs = np.concatenate( [obs_s104[sel_features], obs_s105[sel_features], obs_s106[sel_features]])[None, :] n_features = y_obs.shape[1] y_ind = np.arange(1, n_features + 1) # Set up sepia model data = SepiaData(t_sim=design, y_sim=y_sim, y_ind_sim=y_ind, y_obs=y_obs, y_ind_obs=y_ind) data.standardize_y() data.transform_xt() data.create_K_basis(n_features - 1) print(data) # Setup model # We have a known observation error Sigy = np.diag( np.squeeze( (0.01 * np.ones(n_features) * y_obs) / data.sim_data.orig_y_sd**2)) model = SepiaModel(data, Sigy) # Modify priors to match Matlab
def setUp(self, m=100, n=1, nt_sim=50, nt_obs=20, n_theta=3, n_basis=5, sig_n=0.1, seed=42): multi_data_dict = generate_data.generate_multi_sim_and_obs(m=m, n=n, nt_sim=nt_sim, nt_obs=nt_obs, n_theta=n_theta, n_basis=n_basis, sig_n=sig_n, seed=seed) univ_data_dict = generate_data.generate_univ_sim_and_obs(m=m, n=n, sig_n=sig_n, seed=seed) d = SepiaData(t_sim=univ_data_dict['t_sim'], y_sim=univ_data_dict['y_sim']) d.transform_xt() d.standardize_y() self.univ_sim_only_data = d d = SepiaData(t_sim=univ_data_dict['t_sim'], y_sim=univ_data_dict['y_sim'], y_obs=univ_data_dict['y_obs']) d.transform_xt() d.standardize_y() self.univ_sim_and_obs_data = d d = SepiaData(t_sim=multi_data_dict['t_sim'], y_sim=multi_data_dict['y_sim'], y_ind_sim=multi_data_dict['y_ind_sim']) d.transform_xt() d.standardize_y() d.create_K_basis(5) self.multi_sim_only_data = d d = SepiaData(t_sim=multi_data_dict['t_sim'], y_sim=multi_data_dict['y_sim'], y_ind_sim=multi_data_dict['y_ind_sim'], y_obs=multi_data_dict['y_obs'], y_ind_obs=multi_data_dict['y_ind_obs']) d.transform_xt() d.standardize_y() d.create_K_basis(5) self.multi_sim_and_obs_noD_data = d d = SepiaData(t_sim=multi_data_dict['t_sim'], y_sim=multi_data_dict['y_sim'], y_ind_sim=multi_data_dict['y_ind_sim'], y_obs=multi_data_dict['y_obs'], y_ind_obs=multi_data_dict['y_ind_obs']) d.transform_xt() d.standardize_y() d.create_K_basis(5) d.create_D_basis('linear') self.multi_sim_and_obs_data = d
# field data R = data_dict['R'] # radii of balls .1,.2,.4 (m) h_field = data_dict['h_field'] # observed heights 5,10,15,20 (m) y_field = data_dict['y_field'] # observed times # sim data sim_design = data_dict['sim_design'] R_sim = sim_design[:, 0] C_sim = sim_design[:, 1] h_sim = data_dict['h_sim'] y_sim = data_dict['y_sim'] data = SepiaData(x_sim=np.reshape(R_sim, (len(R_sim), 1)), t_sim=np.reshape(C_sim, (len(C_sim), 1)), y_sim=y_sim, y_ind_sim=h_sim, x_obs=np.reshape(R, (len(R), 1)), y_obs=y_field, y_ind_obs=h_field) data.transform_xt() data.standardize_y() data.create_K_basis(2) # Generate D matrix with normal kernels D_grid = h_sim # locations on which the kernels are centered D_width = 1.5 # width of each kernel pv = len(D_grid) D_obs = np.zeros(shape=(data.obs_data.y_ind.shape[0], pv)) D_sim = np.zeros(shape=(data.sim_data.y_ind.shape[0], pv)) h_dense = data_dict['h_dense'] D_dense = np.zeros(shape=(h_dense.shape[0], pv))
inset_ax.axvline(x=R[i], ymin=0, ymax=1, color='k', linewidth=.5) plt.show() #%% # Preparing the data for Sepia # To use Sepia, we must package our data into a SepiaData object. In this # example, the known inputs to the simulator are simply the vector of radii # R_sim. We pass this into x_sim as a column vector. Similarly, C_sim is passed # into t_sim as a column vector and is the parameter to be calibrated. We # also pass in y_sim, the simulated time-height curves, and h_sim the heights # associated with those times in y_sim. For the observed data, x_obs get the # experimental radii R, and y_obs gets the experimental time-height curves # generated from (1). Finally we pass in the heights at which the experimental # time observations were taken, y_ind_obs = h_field. data = SepiaData(x_sim = np.reshape(R_sim,(len(R_sim),1)),\ t_sim = np.reshape(C_sim,(len(C_sim),1)), \ y_sim = y_sim, y_ind_sim = h_sim,\ x_obs = np.reshape(R,(len(R),1)), y_obs = y_field,\ y_ind_obs=h_field) #%% ### Transforming x, t, and y # Sepia required that the inputs $x,t$ lie in the interval $[0,1]^{p+q}$, # and the responses $y_{sim},y_{obs}$ be $N(0,1)$. data.transform_xt() data.standardize_y() #%% ### Generate K and D bases # Sepia models multivariate observations and responses using a linear basis. # These *principal components*, or scaled eigenvectors, are computed by the # singular value decomposition. data.create_K_basis(2) data.plot_K_basis()
def setUp(self, m=100, n=1, nt_sim=50, nt_obs=20, n_theta=3, n_basis=5, sig_n=0.1, seed=42): n_shared = 3 self.shared_idx = np.array([[1, 1, 1], [2, -1, 2]]) multi_data_list = [] univ_data_list = [] for si in range(n_shared): multi_data_dict = generate_data.generate_multi_sim_and_obs( m=m, n=n, nt_sim=nt_sim, nt_obs=nt_obs, n_theta=n_theta, n_basis=n_basis, sig_n=sig_n, seed=seed) univ_data_dict = generate_data.generate_univ_sim_and_obs( m=m, n=n, sig_n=sig_n, seed=seed) d = SepiaData(t_sim=univ_data_dict['t_sim'], y_sim=univ_data_dict['y_sim'], y_obs=univ_data_dict['y_obs']) d.transform_xt() d.standardize_y() univ_data_list.append(d) d = SepiaData(t_sim=multi_data_dict['t_sim'], y_sim=multi_data_dict['y_sim'], y_ind_sim=multi_data_dict['y_ind_sim'], y_obs=multi_data_dict['y_obs'], y_ind_obs=multi_data_dict['y_ind_obs']) d.transform_xt() d.standardize_y() d.create_K_basis(5) d.create_D_basis('constant') multi_data_list.append(d) self.univ_model_list = [SepiaModel(d) for d in univ_data_list] self.multi_model_list = [SepiaModel(d) for d in multi_data_list]
def test_multivariate_sim_and_obs_lamVzGroups_setup(self): """ Tests setup for multivariate sim and obs model with D and lamVzGroups """ d = SepiaData(t_sim=self.data_dict['t_sim'], y_sim=self.data_dict['y_sim'], y_ind_sim=self.data_dict['y_ind_sim'], y_obs=self.data_dict['y_obs'], y_ind_obs=self.data_dict['y_ind_obs']) print( 'Testing multivariate sim and obs SepiaModelSetup with discrep...', flush=True) print(d, flush=True) # Do explicit transformation d.transform_xt() d.standardize_y() d.create_K_basis(n_pc=5) custom_D = np.vstack([ np.ones(d.obs_data.y.shape[1]), d.obs_data.y_ind, d.obs_data.y_ind**2 ]) d.create_D_basis(D_obs=custom_D) lamVzGroup = [0, 1, 1] model = setup_model(d, lamVzGroup=lamVzGroup) # Check model components are set up as expected self.assertTrue(not model.num.scalar_out) self.assertTrue(not model.num.sim_only) self.assertTrue(model.num.m == 100) self.assertTrue(model.num.n == 1) self.assertTrue(model.num.p == 1) self.assertTrue(model.num.q == 3) self.assertTrue(model.num.pu == 5) self.assertTrue(model.num.pv == 3) # Check parameter setup -- betaU betaU = model.params.betaU self.assertTrue(betaU.val_shape == (model.num.q + model.num.p, model.num.pu)) self.assertTrue(betaU.prior.dist == 'Beta') self.assertTrue(betaU.mcmc.stepType == 'BetaRho') # Check parameter setup -- betaV betaV = model.params.betaV self.assertTrue(betaV.val_shape == (1, 2)) self.assertTrue(betaV.prior.dist == 'Beta') self.assertTrue(betaV.mcmc.stepType == 'BetaRho') # Check parameter setup -- lamUz lamUz = model.params.lamUz self.assertTrue(lamUz.val_shape == (1, model.num.pu)) self.assertTrue(lamUz.prior.dist == 'Gamma') self.assertTrue(lamUz.mcmc.stepType == 'PropMH') # Check parameter setup -- lamUz lamVz = model.params.lamVz self.assertTrue(lamVz.val_shape == (1, 2)) self.assertTrue(lamVz.prior.dist == 'Gamma') self.assertTrue(lamVz.mcmc.stepType == 'PropMH') # Check parameter setup -- lamWOs lamWOs = model.params.lamWOs self.assertTrue(lamWOs.val_shape == (1, 1)) self.assertTrue(lamWOs.prior.dist == 'Gamma') self.assertTrue(lamWOs.mcmc.stepType == 'PropMH') # Check parameter setup -- lamWs lamWs = model.params.lamWs self.assertTrue(lamWs.val_shape == (1, model.num.pu)) self.assertTrue(lamWs.prior.dist == 'Gamma') self.assertTrue(lamWs.mcmc.stepType == 'PropMH') # Check parameter setup -- lamOs lamOs = model.params.lamOs self.assertTrue(lamOs.val_shape == (1, 1)) self.assertTrue(lamOs.prior.dist == 'Gamma') self.assertTrue(lamOs.mcmc.stepType == 'PropMH') # Check parameter setup -- theta theta = model.params.theta self.assertTrue(theta.val_shape == (1, model.num.q)) self.assertTrue(theta.prior.dist == 'Normal') self.assertTrue(theta.mcmc.stepType == 'Uniform') self.assertTrue(np.allclose(theta.orig_range[0], 0)) self.assertTrue(np.allclose(theta.orig_range[1], 1)) mcmc_list_names = [p.name for p in model.params.mcmcList] self.assertTrue( set(mcmc_list_names) == set([ 'betaU', 'betaV', 'lamUz', 'lamVz', 'lamWOs', 'lamWs', 'lamOs', 'theta' ]))
def test_sim_and_obs_x_and_t_cat(self): m = 20 n = 10 x = np.concatenate([ np.random.uniform(-1, 3, (m, 3)), 1 + np.random.choice(3, size=(m, 1)) ], axis=1) x2 = np.concatenate([ np.random.uniform(-1, 3, (n, 3)), 1 + np.random.choice(3, size=(n, 1)) ], axis=1) t = np.concatenate([ np.random.uniform(-1, 3, (m, 2)), 1 + np.random.choice(4, size=(m, 1)) ], axis=1) y = np.random.normal(size=(m, 50)) y_ind = np.linspace(0, 1, 50) y2 = np.random.normal(size=(n, 20)) y_ind2 = np.linspace(0, 1, 20) x_cat_ind = [0, 0, 0, 3] t_cat_ind = [0, 0, 4] data = SepiaData(x_sim=x, t_sim=t, x_cat_ind=x_cat_ind, t_cat_ind=t_cat_ind, y_sim=y, y_ind_sim=y_ind, x_obs=x2, y_obs=y2, y_ind_obs=y_ind2) call_data_methods(data) model = SepiaModel(data) call_model_methods(model) call_plot_functions(model) samples = model.get_samples() pred = SepiaEmulatorPrediction(x_pred=x, t_pred=t, samples=samples, model=model, addResidVar=True, storeMuSigma=True) pred.get_w() pred.get_y() pred.get_y(std=True) pred.get_mu_sigma() xpred = SepiaXvalEmulatorPrediction(samples=samples, model=model, addResidVar=True, storeMuSigma=True) xpred.get_w() xpred.get_y() xpred.get_y(std=True) xpred.get_mu_sigma() pred = SepiaFullPrediction(x_pred=x, t_pred=t, samples=samples, model=model, addResidVar=True, storeMuSigma=True) pred.get_u_v() pred.get_mu_sigma() pred.get_ysim() pred.get_ysim(as_obs=True) pred.get_ysim(as_obs=True, std=True) pred.get_yobs() pred.get_yobs(as_obs=True) pred.get_yobs(as_obs=True, std=True) pred.get_discrepancy() pred.get_discrepancy(as_obs=True) pred.get_discrepancy(as_obs=True, std=True)
matfile = scipy.io.loadmat('%s/data/multi_sim_and_obs_mcmc_test.mat' % script_path) except Exception as e: print(e) print('make sure matlab.engine installed') y_sim = matfile['y'].T y_ind_sim = matfile['y_ind'].squeeze() xt_sim = matfile['x'] y_obs = matfile['y_obs'] y_ind_obs = matfile['y_ind_obs'].squeeze() x_obs = matfile['x_obs'] data = SepiaData(x_sim=xt_sim[:, 0][:, None], t_sim=xt_sim[:, 1][:, None], y_sim=y_sim, y_ind_sim=y_ind_sim, x_obs=x_obs, y_obs=y_obs, y_ind_obs=y_ind_obs) data.standardize_y() data.transform_xt() data.create_K_basis(n_pc=2) data.create_D_basis(D_obs=matfile['Dobs'].T) print(data) model = setup_model(data) nsamp = int(matfile['nsamp']) nburn = int(matfile['nburn'])
def setup_multi_sim_and_obs(m=100, n=10, nt_sim=20, nt_obs=15, noise_sd=0.1, nx=5, n_pc=10, seed=42., n_lik=0, n_mcmc=0, n_pred=0, fix_K=False): try: eng = matlab.engine.start_matlab() eng.cd(root_path) eng.addpath('matlab/', nargout=0) res = eng.setup_multi_sim_and_obs(m, n, nt_sim, nt_obs, noise_sd, nx, n_pc, seed, n_lik, n_mcmc, n_pred, nargout=1) eng.quit() except Exception as e: print(e) print( 'Matlab error; make sure matlab.engine installed, check Matlab code for errors.' ) y = np.array(res['y'], dtype=float) y_ind = np.array(res['y_ind'], dtype=float).squeeze() xt = np.array(res['xt'], dtype=float) y_obs = np.array(res['y_obs'], dtype=float) y_ind_obs = np.array(res['y_ind_obs'], dtype=float).squeeze() x_obs = np.array(res['x_obs'], dtype=float) data = SepiaData(x_sim=xt[:, 0][:, None], t_sim=xt[:, 1:], y_sim=y, y_ind_sim=y_ind, x_obs=x_obs, y_obs=y_obs, y_ind_obs=y_ind_obs) data.standardize_y() data.transform_xt() if fix_K: # means use the K from matlab - avoid issues with positive/negative component ambiguity data.create_K_basis(n_pc, K=np.array(res['K']).T) else: data.create_K_basis(n_pc) data.create_D_basis('constant') print(data) model = SepiaModel(data) return model, res