def test_univariate_sim_only_x_only(self): """ Tests setup for univariate sim only where we only use an x input, not t. """ m = 700 # number of simulated observations p = 3 # dimension of x (simulation inputs) x = 0.5 * np.random.uniform(-1, 3, (m, p)) y = 5 * np.random.normal(0, 1, m) + 2 d = SepiaData(x_sim=x, y_sim=y, t_sim=None) print('Testing univariate sim-only SepiaData...') print(d) self.assertTrue(d.obs_data is None) self.assertTrue(d.sim_only) self.assertTrue(d.scalar_out) d.transform_xt() self.assertTrue(np.all(np.min(d.sim_data.x_trans, 0) == 0)) self.assertTrue(np.all(np.max(d.sim_data.x_trans, 0) == 1)) d.standardize_y(center=False, scale=False) self.assertEqual(d.sim_data.orig_y_sd, 1) self.assertEqual(d.sim_data.orig_y_mean, 0) self.assertTrue(np.allclose(d.sim_data.y, d.sim_data.y_std)) d.standardize_y(scale='columnwise') self.assertTrue(np.allclose(d.sim_data.orig_y_sd, 5, rtol=0.1)) self.assertTrue(np.allclose(np.mean(d.sim_data.y_std, 0), 0, rtol=0.1)) self.assertTrue(np.allclose(np.std(d.sim_data.y_std, 0), 1, rtol=0.1)) self.assertTrue(d.sim_data.y.shape == d.sim_data.y_std.shape) d.create_K_basis(10) d.create_D_basis()
def setUp(self, m=100, n=1, nt_sim=50, nt_obs=20, n_theta=3, n_basis=5, sig_n=0.1, seed=42): n_hier = 3 self.hier_idx = np.array([[0, 0, 0]]) #self.hier_idx = np.array([[1, 1, 1], [2, -1, 2]]) # TODO this fails for multivariate; cant use for univariate now multi_data_list = [] univ_data_list = [] for si in range(n_hier): multi_data_dict = generate_data.generate_multi_sim_and_obs(m=m, n=n, nt_sim=nt_sim, nt_obs=nt_obs, n_theta=n_theta, n_basis=n_basis, sig_n=sig_n, seed=seed) univ_data_dict = generate_data.generate_univ_sim_and_obs(m=m, n=n, sig_n=sig_n, seed=seed) d = SepiaData(t_sim=univ_data_dict['t_sim'], y_sim=univ_data_dict['y_sim'], y_obs=univ_data_dict['y_obs']) d.transform_xt() d.standardize_y() univ_data_list.append(d) d = SepiaData(t_sim=multi_data_dict['t_sim'], y_sim=multi_data_dict['y_sim'], y_ind_sim=multi_data_dict['y_ind_sim'], y_obs=multi_data_dict['y_obs'], y_ind_obs=multi_data_dict['y_ind_obs']) d.transform_xt() d.standardize_y() d.create_K_basis(5) d.create_D_basis('constant') multi_data_list.append(d) self.univ_model_list = [SepiaModel(d) for d in univ_data_list] self.multi_model_list = [SepiaModel(d) for d in multi_data_list]
def test_univariate_sim_only_setup(self): """ Tests setup for univariate sim only model """ d = SepiaData(t_sim=self.data_dict['t_sim'], y_sim=self.data_dict['y_sim']) print('Testing univariate sim-only SepiaModelSetup...', flush=True) print(d, flush=True) # Try it without doing standardization/transform to be sure it doesn't break model_notrans = setup_model(copy.deepcopy(d)) # Do explicit transformation d.transform_xt() d.standardize_y() model = setup_model(d) # Check that either way gives same transformation self.assertTrue(np.allclose(model_notrans.data.sim_data.orig_y_mean, model.data.sim_data.orig_y_mean)) self.assertTrue(np.allclose(model_notrans.data.sim_data.orig_y_sd, model.data.sim_data.orig_y_sd)) self.assertTrue(np.allclose(model_notrans.data.sim_data.y_std, model.data.sim_data.y_std)) self.assertTrue(np.allclose(model_notrans.data.sim_data.t_trans, model.data.sim_data.t_trans)) # Check model components are set up as expected self.assertTrue(model.num.scalar_out) self.assertTrue(model.num.sim_only) self.assertTrue(model.num.m == 100) self.assertTrue(model.num.n == 0) self.assertTrue(model.num.p == 1) self.assertTrue(model.num.q == 1) self.assertTrue(model.num.pu == 1) self.assertTrue(model.num.pv == 0) self.assertTrue(np.allclose(model.num.w, model.data.sim_data.y_std)) # Check parameter setup -- betaU betaU = model.params.betaU self.assertTrue(betaU.val_shape == (2, 1)) self.assertTrue(betaU.prior.dist == 'Beta') self.assertTrue(betaU.mcmc.stepType == 'BetaRho') # Check parameter setup -- lamUz lamUz = model.params.lamUz self.assertTrue(lamUz.val_shape == (1, 1)) self.assertTrue(lamUz.prior.dist == 'Gamma') self.assertTrue(lamUz.mcmc.stepType == 'PropMH') # Check parameter setup -- lamWOs lamWOs = model.params.lamWOs self.assertTrue(lamWOs.val_shape == (1, 1)) self.assertTrue(lamWOs.prior.dist == 'Gamma') self.assertTrue(lamWOs.mcmc.stepType == 'PropMH') # Check parameter setup -- lamWs lamWs = model.params.lamWs self.assertTrue(lamWs.val_shape == (1, 1)) self.assertTrue(lamWs.prior.dist == 'Gamma') self.assertTrue(lamWs.mcmc.stepType == 'PropMH') mcmc_list_names = [p.name for p in model.params.mcmcList] self.assertTrue(set(mcmc_list_names) == set(['betaU', 'lamUz', 'lamWOs', 'lamWs']))
def setup_univ_sim_only(m=300, seed=42., n_lik=0, n_mcmc=0, n_pred=0, n_lev=0, n_burn=0, sens=0): try: eng = matlab.engine.start_matlab() eng.cd(root_path) eng.addpath('matlab/', nargout=0) res = eng.setup_univ_sim_only(m, seed, n_lik, n_mcmc, n_pred, n_lev, n_burn, sens, nargout=1) eng.quit() except Exception as e: print(e) print( 'Matlab error; make sure matlab.engine installed, check Matlab code for errors.' ) y = np.array(res['y'], dtype=float) xt = np.array(res['xt'], dtype=float) data = SepiaData(x_sim=xt[:, 0][:, None], t_sim=xt[:, 1][:, None], y_sim=y) print(data) data.standardize_y() data.transform_xt() model = SepiaModel(data) return model, res
def setup_multi_sim_and_obs_sharedtheta(m=100, n=10, nt_sim=20, nt_obs=15, noise_sd=0.1, nx=5, n_pc=10, seed=42., n_lik=0, n_mcmc=0, n_pred=0, n_shared=2, clist=[], fix_K=False): try: eng = matlab.engine.start_matlab() eng.cd(root_path) eng.addpath('matlab/', nargout=0) res = eng.setup_multi_sim_and_obs_sharedtheta(m, n, nt_sim, nt_obs, noise_sd, nx, n_pc, seed, n_lik, n_mcmc, n_pred, n_shared, matlab.double(clist), nargout=1) eng.quit() except Exception as e: print(e) print( 'Matlab error; make sure matlab.engine installed, check Matlab code for errors.' ) y = np.array(res['y'], dtype=float) # (m, nt_sim, n_shared) y_ind = np.array(res['y_ind'], dtype=float).squeeze() # (nt_sim, n_shared) xt = np.array(res['xt'], dtype=float) # (m, nx, n_shared) y_obs = np.array(res['y_obs'], dtype=float) # (n, nt_sim, n_shared) y_ind_obs = np.array(res['y_ind_obs'], dtype=float).squeeze() # (nt_obs, n_shared) x_obs = np.array(res['x_obs'], dtype=float) # (n, 1, n_shared) model_list = [] for i in range(n_shared): data = SepiaData(x_sim=xt[:, 0, i][:, None], t_sim=xt[:, 1:, i], y_sim=y[:, :, i], y_ind_sim=y_ind[:, i], x_obs=x_obs[:, :, i], y_obs=y_obs[:, :, i], y_ind_obs=y_ind_obs[:, i]) data.standardize_y() data.transform_xt() data.create_K_basis(n_pc) model = SepiaModel(data) model_list.append(model) return model_list, res
def setup_univ_sim_and_obs(m=100, n=50, seed=42., n_lik=0, n_mcmc=0, n_pred=0): try: eng = matlab.engine.start_matlab() eng.cd(root_path) eng.addpath('matlab/', nargout=0) res = eng.setup_univ_sim_and_obs(m, n, seed, n_lik, n_mcmc, n_pred, nargout=1) eng.quit() except Exception as e: print(e) print( 'Matlab error; make sure matlab.engine installed, check Matlab code for errors.' ) y = np.array(res['y'], dtype=float) xt = np.array(res['xt'], dtype=float) y_obs = np.array(res['y_obs'], dtype=float) x_obs = np.array(res['x_obs'], dtype=float).reshape((n, 1)) data = SepiaData(x_sim=xt[:, 0][:, None], t_sim=xt[:, 1][:, None], y_sim=y, x_obs=x_obs, y_obs=y_obs) data.standardize_y() data.transform_xt() print(data) model = SepiaModel(data) return model, res
def test_emulator_setup_loglik(self): # set up regular model d = SepiaData(x_sim=self.x_sim, y_sim=self.y_sim, y_ind_sim=np.array([0, 1])) d.create_K_basis(K=np.eye(2)) d.transform_xt(x_notrans=True) d.standardize_y(scale='columnwise') print(d) mod = SepiaModel(d) print('Emulator model LL=%f \n' % compute_log_lik(mod)) # set up kron model kd = SepiaData(xt_sim_sep=self.x_sim_kron, y_sim=self.y_sim, y_ind_sim=self.y_ind_sim) kd.create_K_basis(K=np.eye(2)) kd.transform_xt(x_notrans=True) kd.standardize_y(scale='columnwise') print(kd) kmod = SepiaModel(kd) print('Emulator Sep model LL=%f \n' % compute_log_lik(kmod)) self.assertAlmostEqual(compute_log_lik(mod), compute_log_lik(kmod), places=5) pass
def test_univariate_sim_and_obs(self): """ Tests univiariate sim and obs where we pass in both x and t. """ m = 700 # number of simulated observations p = 3 # dimension of x (sim/obs inputs) q = 2 # dimension of t (extra sim inputs) n = 5 # number of observed observations x_sim = np.random.uniform(-1, 3, (m, p)) t = np.random.uniform(-10, 10, (m, q)) x_obs = np.random.uniform(-1.5, 3.5, (n, p)) y_sim = 5 * np.random.normal(0, 1, m) + 2 y_obs = 5 * np.random.normal(0, 1, n) + 1 d = SepiaData(x_sim=x_sim, y_sim=y_sim, t_sim=t, x_obs=x_obs, y_obs=y_obs) print('Testing univariate sim and obs SepiaData...') print(d) self.assertTrue(d.obs_data is not None) self.assertTrue(not d.sim_only) self.assertTrue(d.scalar_out) d.transform_xt() self.assertTrue(np.all(np.min(d.sim_data.x_trans, 0) == 0)) self.assertTrue(np.all(np.max(d.sim_data.x_trans, 0) == 1)) self.assertTrue(np.all(np.min(d.sim_data.t_trans, 0) == 0)) self.assertTrue(np.all(np.max(d.sim_data.t_trans, 0) == 1)) d.transform_xt(-10, 10) self.assertTrue(np.all(np.min(d.sim_data.x_trans, 0) == -10)) self.assertTrue(np.all(np.max(d.sim_data.x_trans, 0) == 10)) self.assertTrue(np.all(np.min(d.sim_data.t_trans, 0) == -10)) self.assertTrue(np.all(np.max(d.sim_data.t_trans, 0) == 10)) d.standardize_y(center=False, scale=False) self.assertEqual(d.sim_data.orig_y_sd, 1) self.assertEqual(d.sim_data.orig_y_mean, 0) self.assertTrue(np.allclose(d.sim_data.y, d.sim_data.y_std)) self.assertEqual(d.obs_data.orig_y_sd, 1) self.assertEqual(d.obs_data.orig_y_mean, 0) self.assertTrue(np.allclose(d.obs_data.y, d.obs_data.y_std)) d.standardize_y(scale='columnwise') self.assertTrue(np.allclose(d.sim_data.orig_y_sd, 5, rtol=0.1)) self.assertTrue(np.allclose(d.sim_data.orig_y_mean, 2, rtol=0.1)) self.assertTrue(np.allclose(np.mean(d.sim_data.y_std, 0), 0, rtol=0.1)) self.assertTrue(np.allclose(np.std(d.sim_data.y_std, 0), 1, rtol=0.1)) self.assertTrue(d.sim_data.y.shape == d.sim_data.y_std.shape) self.assertTrue(np.allclose(d.obs_data.orig_y_sd, 5, rtol=0.1)) self.assertTrue(np.allclose(d.obs_data.orig_y_mean, 2, rtol=0.1)) self.assertTrue(d.obs_data.y.shape == d.obs_data.y_std.shape) d.create_K_basis(10) d.create_D_basis()
def setup_multi_sim_and_obs(m=100, n=10, nt_sim=20, nt_obs=15, noise_sd=0.1, nx=5, n_pc=10, seed=42., n_lik=0, n_mcmc=0, n_pred=0, fix_K=False): try: eng = matlab.engine.start_matlab() eng.cd(root_path) eng.addpath('matlab/', nargout=0) res = eng.setup_multi_sim_and_obs(m, n, nt_sim, nt_obs, noise_sd, nx, n_pc, seed, n_lik, n_mcmc, n_pred, nargout=1) eng.quit() except Exception as e: print(e) print( 'Matlab error; make sure matlab.engine installed, check Matlab code for errors.' ) y = np.array(res['y'], dtype=float) y_ind = np.array(res['y_ind'], dtype=float).squeeze() xt = np.array(res['xt'], dtype=float) y_obs = np.array(res['y_obs'], dtype=float) y_ind_obs = np.array(res['y_ind_obs'], dtype=float).squeeze() x_obs = np.array(res['x_obs'], dtype=float) data = SepiaData(x_sim=xt[:, 0][:, None], t_sim=xt[:, 1:], y_sim=y, y_ind_sim=y_ind, x_obs=x_obs, y_obs=y_obs, y_ind_obs=y_ind_obs) data.standardize_y() data.transform_xt() if fix_K: # means use the K from matlab - avoid issues with positive/negative component ambiguity data.create_K_basis(n_pc, K=np.array(res['K']).T) else: data.create_K_basis(n_pc) data.create_D_basis('constant') print(data) model = SepiaModel(data) return model, res
def create_test_case(): n_obs = 2 n_sim = 5 p = 3 q = 4 ell_sim = 80 ell_obs = 20 t = np.random.uniform(0, 1, (n_sim, q)) x = 0.5 * np.ones((n_sim, p)) y_ind = np.linspace(0, 100, ell_sim) y = 10 * np.random.normal( 0, 1, (n_sim, 1)) * (y_ind[None, :] - 50)**2 / 75. + 20 * np.random.normal( 0, 1, (n_sim, 1)) * y_ind[None, :] + 20 * np.random.normal( 0, 1, (n_sim, 1)) x_obs = 0.5 * np.ones((n_obs, p)) y_obs_ind = np.linspace(10, 85, ell_obs) y_obs = 10 * np.random.normal(0, 1, (n_obs, 1)) * ( y_obs_ind[None, :] - 50)**2 / 75. + 20 * np.random.normal( 0, 1, (n_obs, 1)) * y_obs_ind[None, :] + 20 * np.random.normal( 0, 1, (n_obs, 1)) data = SepiaData(x_sim=x, t_sim=t, y_sim=y, y_ind_sim=y_ind, x_obs=x_obs, y_obs=y_obs, y_ind_obs=y_obs_ind) data.standardize_y() data.transform_xt() data.create_K_basis(n_pc=3) data.create_D_basis('constant') # Save as matfile for testing in matlab savedict = { 't': t, 'y': y, 'y_obs': y_obs, 'D': data.obs_data.D, 'Kobs': data.obs_data.K, 'Ksim': data.sim_data.K, 'y_obs_std': data.obs_data.y_std, 'y_sim_std': data.sim_data.y_std, 'y_sd': data.sim_data.orig_y_sd } scipy.io.savemat('data/test_case_matlab.mat', savedict) g = setup_model(data) # Save pickle file of results savedict = {'model': g, 'data': data} with open('data/test_case_python_model.pkl', 'wb') as f: pickle.dump(savedict, f)
def test_multivariate_sim_only_x_only(self): """ Tests setup for multivariate sim only where we only use an x input, not t. """ m = 700 # number of simulated observations p = 3 # dimension of x (simulation inputs) ell = 1000 # dimension of y output pu = 3 # number of PCs y_ind = np.linspace(0, 100, ell) K_true = np.vstack([ 0.5 * (np.sin(y_ind) + 1), np.square(-y_ind + 50) / 2500, y_ind / 100 ]) y = np.transpose( np.log(1 + y_ind)[:, None] + np.dot( K_true.T, 2 * np.array([1, 0.5, 0.2])[:, None] * np.random.normal(0, 1, (pu, m)))) x = 0.5 * np.random.uniform(-1, 3, (m, p)) d = SepiaData(x_sim=x, y_sim=y, t_sim=None, y_ind_sim=y_ind) print('Testing multivariate sim-only SepiaData...') print(d) self.assertTrue(d.obs_data is None) self.assertTrue(d.sim_only) self.assertTrue(not d.scalar_out) d.transform_xt() self.assertTrue(np.all(np.min(d.sim_data.x_trans, 0) == 0)) self.assertTrue(np.all(np.max(d.sim_data.x_trans, 0) == 1)) d.transform_xt(-10, 10) self.assertTrue(np.all(np.min(d.sim_data.x_trans, 0) == -10)) self.assertTrue(np.all(np.max(d.sim_data.x_trans, 0) == 10)) d.standardize_y(center=False, scale=False) self.assertEqual(d.sim_data.orig_y_sd, 1) self.assertEqual(d.sim_data.orig_y_mean, 0) self.assertTrue(np.allclose(d.sim_data.y, d.sim_data.y_std)) d.standardize_y(scale='columnwise') self.assertTrue( np.allclose(d.sim_data.orig_y_mean, np.log(1 + y_ind), rtol=0.1, atol=0.5)) self.assertTrue(np.allclose(np.std(d.sim_data.y_std, 0), 1, rtol=0.1)) self.assertTrue(np.allclose(np.mean(d.sim_data.y_std, 0), 0, rtol=0.1)) self.assertTrue(d.sim_data.y.shape == d.sim_data.y_std.shape) d.create_K_basis(3) self.assertTrue(d.sim_data.K.shape == (pu, ell)) d.create_D_basis() print(d)
def setup_multi_sim_and_obs_noD(m=100, n=10, nt_sim=20, nt_obs=15, noise_sd=0.1, nx=5, n_pc=10, seed=42., n_lik=0, n_mcmc=0): try: eng = matlab.engine.start_matlab() eng.cd(root_path) eng.addpath('matlab/', nargout=0) res = eng.setup_multi_sim_and_obs_noD(m, n, nt_sim, nt_obs, noise_sd, nx, n_pc, seed, n_lik, n_mcmc, nargout=1) eng.quit() except Exception as e: print(e) print( 'Matlab error; make sure matlab.engine installed, check Matlab code for errors.' ) y = np.array(res['y'], dtype=float) y_ind = np.array(res['y_ind'], dtype=float).squeeze() xt = np.array(res['xt'], dtype=float) y_obs = np.array(res['y_obs'], dtype=float) y_ind_obs = np.array(res['y_ind_obs'], dtype=float).squeeze() x_obs = np.array(res['x_obs'], dtype=float) data = SepiaData(x_sim=xt[:, 0][:, None], t_sim=xt[:, 1:], y_sim=y, y_ind_sim=y_ind, x_obs=x_obs, y_obs=y_obs, y_ind_obs=y_ind_obs) data.standardize_y() data.transform_xt() data.create_K_basis(n_pc) print(data) model = SepiaModel(data) return model, res
def setUp(self, m=20, n=1, nt_sim=30, nt_obs=20, n_theta=3, n_basis=5, sig_n=0.1, seed=42): multi_data_dict = generate_data.generate_multi_sim_and_obs( m=m, n=n, nt_sim=nt_sim, nt_obs=nt_obs, n_theta=n_theta, n_basis=n_basis, sig_n=sig_n, seed=seed) univ_data_dict = generate_data.generate_univ_sim_and_obs(m=m, n=n, sig_n=sig_n, seed=seed) d = SepiaData(t_sim=univ_data_dict['t_sim'], y_sim=univ_data_dict['y_sim']) d.transform_xt() d.standardize_y() self.univ_sim_only_model = SepiaModel(d) d = SepiaData(t_sim=univ_data_dict['t_sim'], y_sim=univ_data_dict['y_sim'], y_obs=univ_data_dict['y_obs']) d.transform_xt() d.standardize_y() self.univ_sim_and_obs_model = SepiaModel(d) d = SepiaData(t_sim=multi_data_dict['t_sim'], y_sim=multi_data_dict['y_sim'], y_ind_sim=multi_data_dict['y_ind_sim']) d.transform_xt() d.standardize_y() d.create_K_basis(5) self.multi_sim_only_model = SepiaModel(d) t = np.concatenate([ multi_data_dict['t_sim'], np.random.choice(range(1, 5), (m, 1), replace=True) ], axis=1) d = SepiaData(t_sim=t, y_sim=multi_data_dict['y_sim'], y_ind_sim=multi_data_dict['y_ind_sim'], t_cat_ind=[0, 0, 0, 4]) d.transform_xt() d.standardize_y() d.create_K_basis(5) self.multi_sim_only_catind_model = SepiaModel(d)
def setup_multi_sim_only(m=300, nt=20, nx=5, n_pc=10, seed=42., n_lik=0, n_mcmc=0, n_pred=0, fix_K=False, sens=0): try: eng = matlab.engine.start_matlab() eng.cd(root_path) eng.addpath('matlab/', nargout=0) res = eng.setup_multi_sim_only(m, nt, nx, n_pc, seed, n_lik, n_mcmc, n_pred, sens, nargout=1) eng.quit() except Exception as e: print(e) print( 'Matlab error; make sure matlab.engine installed, check Matlab code for errors.' ) y = np.array(res['y'], dtype=float) y_ind = np.array(res['y_ind'], dtype=float).squeeze() xt = np.array(res['xt'], dtype=float) data = SepiaData(x_sim=xt[:, 0][:, None], t_sim=xt[:, 1:], y_sim=y, y_ind_sim=y_ind) data.standardize_y() data.transform_xt() data.create_K_basis(n_pc) if fix_K: data.sim_data.K = np.array(res['K']).T print(data) model = SepiaModel(data) return model, res
def test_univariate_sim_only_lik(self): """ Tests log lik for univariate sim only model """ d = SepiaData(t_sim=self.univ_data_dict['t_sim'], y_sim=self.univ_data_dict['y_sim']) print('Testing univariate sim-only SepiaLogLik...', flush=True) print(d, flush=True) d.transform_xt() d.standardize_y() model = setup_model(d) model.logLik() for param in model.params.mcmcList: for cindex in range(int(np.prod(param.val_shape))): model.logLik(cvar=param.name, cindex=cindex)
def setUp(self, m=100, n=1, nt_sim=50, nt_obs=20, n_theta=3, n_basis=5, sig_n=0.1, seed=42): multi_data_dict = generate_data.generate_multi_sim_and_obs(m=m, n=n, nt_sim=nt_sim, nt_obs=nt_obs, n_theta=n_theta, n_basis=n_basis, sig_n=sig_n, seed=seed) univ_data_dict = generate_data.generate_univ_sim_and_obs(m=m, n=n, sig_n=sig_n, seed=seed) d = SepiaData(t_sim=univ_data_dict['t_sim'], y_sim=univ_data_dict['y_sim']) d.transform_xt() d.standardize_y() self.univ_sim_only_data = d d = SepiaData(t_sim=univ_data_dict['t_sim'], y_sim=univ_data_dict['y_sim'], y_obs=univ_data_dict['y_obs']) d.transform_xt() d.standardize_y() self.univ_sim_and_obs_data = d d = SepiaData(t_sim=multi_data_dict['t_sim'], y_sim=multi_data_dict['y_sim'], y_ind_sim=multi_data_dict['y_ind_sim']) d.transform_xt() d.standardize_y() d.create_K_basis(5) self.multi_sim_only_data = d d = SepiaData(t_sim=multi_data_dict['t_sim'], y_sim=multi_data_dict['y_sim'], y_ind_sim=multi_data_dict['y_ind_sim'], y_obs=multi_data_dict['y_obs'], y_ind_obs=multi_data_dict['y_ind_obs']) d.transform_xt() d.standardize_y() d.create_K_basis(5) self.multi_sim_and_obs_noD_data = d d = SepiaData(t_sim=multi_data_dict['t_sim'], y_sim=multi_data_dict['y_sim'], y_ind_sim=multi_data_dict['y_ind_sim'], y_obs=multi_data_dict['y_obs'], y_ind_obs=multi_data_dict['y_ind_obs']) d.transform_xt() d.standardize_y() d.create_K_basis(5) d.create_D_basis('linear') self.multi_sim_and_obs_data = d
def test_multivariate_sim_and_obs_noD_lik(self): """ Tests log lik for multivariate sim and obs model no discrep """ d = SepiaData(t_sim=self.multi_data_dict['t_sim'], y_sim=self.multi_data_dict['y_sim'], y_ind_sim=self.multi_data_dict['y_ind_sim'], y_obs=self.multi_data_dict['y_obs'], y_ind_obs=self.multi_data_dict['y_ind_obs']) print('Testing multivariate sim-only SepiaLogLik...', flush=True) print(d, flush=True) d.transform_xt() d.standardize_y() d.create_K_basis(5) model = setup_model(d) model.logLik() for param in model.params.mcmcList: for cindex in range(int(np.prod(param.val_shape))): model.logLik(cvar=param.name, cindex=cindex)
def test_predict_univ_sim_only(self): np.random.seed(42) show_figs = True # Open data from matlab script_path = os.path.dirname(os.path.realpath(__file__)) mat_fn = '%s/data/univ_sim_only_mcmc_test.mat' % script_path if os.path.isfile(mat_fn): # if the matlab data is already in place, just load that print('Found matfile, loading from univ_sim_only_mcmc_test.mat \n') matfile = scipy.io.loadmat(mat_fn) else: print('Generating matfile univ_sim_only_mcmc_test.mat \n') # Run matlab code, then open data from matlab try: eng = matlab.engine.start_matlab() eng.cd(script_path) eng.addpath('matlab/', nargout=0) eng.univ_sim_only_mcmc_test(nargout=0) eng.quit() matfile = scipy.io.loadmat('mat_fn') except Exception as e: print(e) print('make sure matlab.engine installed') y = matfile['y'] x = matfile['x'] t = matfile['t'] data = SepiaData(x_sim=x, t_sim=t, y_sim=y) data.standardize_y() data.transform_xt() print(data) model = setup_model(data) nsamp = int(matfile['nsamp']) nburn = int(matfile['nburn']) t_start = time() model.do_mcmc(nburn + nsamp) t_end = time() print('Python mcmc time %0.3g s' % (t_end - t_start)) print('Matlab mcmc time %0.3g s' % matfile['mcmc_time']) # Creates dict with each sampled variable name as key, array of samples (nsamp, ...) as value samples = model.get_samples(nburn) log_post = np.array(model.params.lp.mcmc.draws) np.random.seed(42) psamps = model.get_samples(0, sampleset=[0, 1, 2, 3, 4], flat=True) pred = wPred([0.5, 0.5], psamps, model.num, model.data, returnMuSigma=True) print('Samples are:') print(pred.w.squeeze()) print('Matlab Samples are:') print(matfile['pred_w'].squeeze()) print('Mu are:') print(pred.mu.squeeze()) print('Matlab Mu are:') print(matfile['pred_Myhat'].squeeze()) print('Sigma are:') print(pred.sigma.squeeze()) print('Matlab Sigma are:') print(matfile['pred_Syhat'].squeeze()) print('Checking predicted realizations...') self.assertTrue( np.allclose(matfile['pred_w'].squeeze(), pred.w.squeeze())) print('Checking predicted means...') self.assertTrue( np.allclose(matfile['pred_Myhat'].squeeze(), pred.mu.squeeze())) print('Checking predicted sigmas...') self.assertTrue( np.allclose(matfile['pred_Syhat'].squeeze(), pred.sigma.squeeze())) # Prediction with multiple realizations np.random.seed(42) sampleset = np.arange(100, 1001, 100) - 1 samples = model.get_samples(sampleset=sampleset) nq = 10 t = np.linspace(0, 1, nq) xpred = np.column_stack((np.ones((nq, 1)) * 0.5, t)) pred_plot = wPred(xpred, samples, model.num, model.data) print('pred_plot_w are:') print(pred_plot.w.squeeze()[0, :]) print('Matlab pred_plot_w are:') print(matfile['pred_plot_w'].squeeze()[0, :]) print('Checking predicted realizations for plotting...') # Apparently numerics come into play here, need to turn down the rtol on 'close' self.assertTrue( np.allclose(matfile['pred_plot_w'].squeeze(), pred_plot.w.squeeze(), rtol=1e-3)) print('Done.') if show_figs: import matplotlib.pyplot as plt plt.figure() plt.plot(data.sim_data.t_trans, data.sim_data.y_std) plt.plot(np.tile(t, (len(sampleset), 1)), np.squeeze(pred_plot.w), '.') plt.show()
def test_predict_uv_from_multi_obs(self): show_figs = True exclude_burnin = True n_pc = 2 seed = 42. lamWOs_init = 50000. # use 0 to use default lamWOs initial value nsamp = 100 nburn = 0 # Open data from matlab script_path = os.path.dirname(os.path.realpath(__file__)) mat_fn = '%s/data/multi_sim_and_obs_mcmc_test.mat' % script_path if os.path.isfile(mat_fn): # if the matlab data is already in place, just load that print( 'Found matfile, loading from multi_sim_and_obs_mcmc_test.mat \n' ) matfile = scipy.io.loadmat(mat_fn) else: print('Generating matfile multi_sim_and_obs_mcmc_test.mat \n') # Run matlab code, then open data from matlab script_path = os.path.dirname(os.path.realpath(__file__)) # Run matlab code, then open data from matlab try: eng = matlab.engine.start_matlab() eng.cd(script_path) eng.addpath('matlab/', nargout=0) eng.multi_sim_and_obs_mcmc_test(nsamp, nburn, seed, lamWOs_init, n_pc, 0, nargout=0) eng.quit() matfile = scipy.io.loadmat( '%s/data/multi_sim_and_obs_mcmc_test.mat' % script_path) except Exception as e: print(e) print('make sure matlab.engine installed') nburn = int(matfile['nburn']) nsamp = int(matfile['nsamp']) y_sim = matfile['y'].T y_ind_sim = matfile['y_ind'].squeeze() xt_sim = matfile['x'] y_obs = matfile['y_obs'] y_ind_obs = matfile['y_ind_obs'].squeeze() x_obs = matfile['x_obs'] data = SepiaData(x_sim=xt_sim[:, 0][:, None], t_sim=xt_sim[:, 1][:, None], y_sim=y_sim, y_ind_sim=y_ind_sim, x_obs=x_obs, y_obs=y_obs, y_ind_obs=y_ind_obs) data.standardize_y() data.transform_xt() data.create_K_basis(n_pc=n_pc) data.create_D_basis(D=matfile['Dobs'].T) print(data) np.random.seed(int(seed)) model = setup_model(data) if lamWOs_init > 0: model.params.lamWOs.val = np.array([[lamWOs_init]]) t_start = time() model.do_mcmc(nburn + nsamp) t_end = time() print('Python mcmc time %0.3g s' % (t_end - t_start)) print('Matlab mcmc time %0.3g s' % matfile['mcmc_time']) np.random.seed(int(seed)) psamps = model.get_samples(0, sampleset=[0, 4], flat=True) #pred = uvPred([0.5], psamps, model.num, model.data, returnMuSigma=True, useAltW=True) pred = uvPred([0.5], psamps, model.num, model.data, returnMuSigma=True) print('Samples of u are:') print(pred.u.squeeze()) print('Matlab Samples of u are:') print(matfile['pred2_u'].squeeze()) print('Samples of v are:') print(pred.v.squeeze()) print('Matlab Samples of v are:') print(matfile['pred2_v'].squeeze()) print('Mu are:') print(pred.mu.squeeze()) print('Matlab Mu are:') print(matfile['pred2_Myhat']) print('Sigma are:') print(pred.sigma.squeeze().reshape(14, 7).T) print('Matlab Sigma are:') print(matfile['pred2_Syhat'].squeeze()) print('Checking predicted realizations...') self.assertTrue( np.allclose(matfile['pred2_u'].squeeze(), pred.u.squeeze())) self.assertTrue( np.allclose(matfile['pred2_v'].squeeze(), pred.v.squeeze())) print('Checking predicted means...') self.assertTrue( np.allclose(matfile['pred2_Myhat'].squeeze(), pred.mu.squeeze())) print('Checking predicted sigmas...') self.assertTrue( np.allclose(matfile['pred2_Syhat'].squeeze(), pred.sigma.squeeze().reshape(14, 7).T)) print('Done.')
data = SepiaData(t_sim=data_dict['t_sim'], y_sim=data_dict['y_sim'], y_ind_sim=data_dict['y_ind_sim'], y_obs=data_dict['y_obs'], y_ind_obs=data_dict['y_ind_obs']) print(data) plt.plot(data.sim_data.y_ind, data.sim_data.y.T) plt.plot(data.obs_data.y_ind, data.obs_data.y.T, 'k.', linewidth=3) plt.title('Synthetic data (obs. in black)') plt.xlabel('y index') plt.ylabel('y') plt.show() #%% data.transform_xt() data.standardize_y(scale='columnwise') data.create_K_basis(5) data.create_D_basis(type='linear') print(data) #%% model = SepiaModel(data) #%% cachefile_name='multivariate_example_with_prediction.pkl' import os.path import pickle
# simulated data with open(datadir+'desNative80x4Cg.txt','r') as f: sim_data = np.loadtxt(f) x_sim = sim_data[:,0:2] # x = {R, rho_ball} t_sim = sim_data[:,2:4] # t = {C, g} with open(datadir+'simHeights101x1','r') as f: h_sim = np.loadtxt(f) with open(datadir+'sims101x80Cg.txt','r') as f: y_sim = np.loadtxt(f).T # create sepia data object data = SepiaData(x_sim = x_sim, t_sim = t_sim, y_ind_sim = h_sim, y_sim = y_sim,\ x_obs = x_obs, y_obs = y_obs, y_ind_obs = h_obs) data.transform_xt() data.standardize_y() data.create_K_basis(3) data.create_D_basis('linear') print(data) model = setup_model(data) #%% Ragged data and model setup y_obs_ragged = [np.array(field_data[0:3,4]),np.array(field_data[3:6,4]),\ np.array(field_data[6:9,4]),np.array(field_data[9:,4])] h_obs_ragged = [np.array(field_data[0:3,3]),np.array(field_data[3:6,3]),\ np.array(field_data[6:9,3]),np.array(field_data[9:,3])]# observed heights #y_obs = [np.array(field_data[0:3,4]),np.array(field_data[3:6,4]),\ # np.array(field_data[[7,9,11],4]),np.array(field_data[12:,4])] #h_obs = [np.array(field_data[0:3,3]),np.array(field_data[3:6,3]),\ # np.array(field_data[[7,9,11],3]),np.array(field_data[12:,3])]# observed heights
y_obs=data_dict['y_obs'], y_ind_obs=data_dict['y_ind_obs']) print(data) plt.plot(data.sim_data.y_ind, data.sim_data.y.T) plt.plot(data.obs_data.y_ind, data.obs_data.y.T, 'k.', linewidth=3) plt.title('Synthetic data (obs. in black)') plt.xlabel('y index') plt.ylabel('y') plt.show() #%% data.transform_xt() data.standardize_y('columnwise') data.create_K_basis(5) data.create_D_basis(type='linear') print(data) #%% model = setup_model(data) #%% cachefile_name = 'multivariate_example_with_prediction.pkl' import os.path import pickle
def test_multivariate_sim_and_obs_lamVzGroups_setup(self): """ Tests setup for multivariate sim and obs model with D and lamVzGroups """ d = SepiaData(t_sim=self.data_dict['t_sim'], y_sim=self.data_dict['y_sim'], y_ind_sim=self.data_dict['y_ind_sim'], y_obs=self.data_dict['y_obs'], y_ind_obs=self.data_dict['y_ind_obs']) print( 'Testing multivariate sim and obs SepiaModelSetup with discrep...', flush=True) print(d, flush=True) # Do explicit transformation d.transform_xt() d.standardize_y() d.create_K_basis(n_pc=5) custom_D = np.vstack([ np.ones(d.obs_data.y.shape[1]), d.obs_data.y_ind, d.obs_data.y_ind**2 ]) d.create_D_basis(D_obs=custom_D) lamVzGroup = [0, 1, 1] model = setup_model(d, lamVzGroup=lamVzGroup) # Check model components are set up as expected self.assertTrue(not model.num.scalar_out) self.assertTrue(not model.num.sim_only) self.assertTrue(model.num.m == 100) self.assertTrue(model.num.n == 1) self.assertTrue(model.num.p == 1) self.assertTrue(model.num.q == 3) self.assertTrue(model.num.pu == 5) self.assertTrue(model.num.pv == 3) # Check parameter setup -- betaU betaU = model.params.betaU self.assertTrue(betaU.val_shape == (model.num.q + model.num.p, model.num.pu)) self.assertTrue(betaU.prior.dist == 'Beta') self.assertTrue(betaU.mcmc.stepType == 'BetaRho') # Check parameter setup -- betaV betaV = model.params.betaV self.assertTrue(betaV.val_shape == (1, 2)) self.assertTrue(betaV.prior.dist == 'Beta') self.assertTrue(betaV.mcmc.stepType == 'BetaRho') # Check parameter setup -- lamUz lamUz = model.params.lamUz self.assertTrue(lamUz.val_shape == (1, model.num.pu)) self.assertTrue(lamUz.prior.dist == 'Gamma') self.assertTrue(lamUz.mcmc.stepType == 'PropMH') # Check parameter setup -- lamUz lamVz = model.params.lamVz self.assertTrue(lamVz.val_shape == (1, 2)) self.assertTrue(lamVz.prior.dist == 'Gamma') self.assertTrue(lamVz.mcmc.stepType == 'PropMH') # Check parameter setup -- lamWOs lamWOs = model.params.lamWOs self.assertTrue(lamWOs.val_shape == (1, 1)) self.assertTrue(lamWOs.prior.dist == 'Gamma') self.assertTrue(lamWOs.mcmc.stepType == 'PropMH') # Check parameter setup -- lamWs lamWs = model.params.lamWs self.assertTrue(lamWs.val_shape == (1, model.num.pu)) self.assertTrue(lamWs.prior.dist == 'Gamma') self.assertTrue(lamWs.mcmc.stepType == 'PropMH') # Check parameter setup -- lamOs lamOs = model.params.lamOs self.assertTrue(lamOs.val_shape == (1, 1)) self.assertTrue(lamOs.prior.dist == 'Gamma') self.assertTrue(lamOs.mcmc.stepType == 'PropMH') # Check parameter setup -- theta theta = model.params.theta self.assertTrue(theta.val_shape == (1, model.num.q)) self.assertTrue(theta.prior.dist == 'Normal') self.assertTrue(theta.mcmc.stepType == 'Uniform') self.assertTrue(np.allclose(theta.orig_range[0], 0)) self.assertTrue(np.allclose(theta.orig_range[1], 1)) mcmc_list_names = [p.name for p in model.params.mcmcList] self.assertTrue( set(mcmc_list_names) == set([ 'betaU', 'betaV', 'lamUz', 'lamVz', 'lamWOs', 'lamWs', 'lamOs', 'theta' ]))
def test_predict_multi_sim_only(self): show_figs = True exclude_burnin = True n_pc = 2 seed = 42 lamWOs_init = 50000. # use 0 to use default lamWOs initial value list_to_sample = [ 1, 2, 3, 4 ] # 1-based indexing for matlab; order is [betaU, lamUz, lamWs, lamWOs] nsamp = 100 nburn = 10 # Open data from matlab script_path = os.path.dirname(os.path.realpath(__file__)) mat_fn = '%s/data/multi_sim_only_mcmc_test.mat' % script_path if os.path.isfile(mat_fn): # if the matlab data is already in place, just load that print( 'Found matfile, loading from multi_sim_only_mcmc_test.mat \n') matfile = scipy.io.loadmat(mat_fn) else: print('Generating matfile multi_sim_only_mcmc_test.mat \n') # Run matlab code, then open data from matlab list_to_sample = [ 1, 2, 3, 4 ] # 1-based indexing for matlab; order is [betaU, lamUz, lamWs, lamWOs] script_path = os.path.dirname(os.path.realpath(__file__)) # Run matlab code, then open data from matlab try: eng = matlab.engine.start_matlab() eng.cd(script_path) eng.addpath('matlab/', nargout=0) eng.multi_sim_only_mcmc_test(nsamp, nburn, list_to_sample, seed, lamWOs_init, n_pc, nargout=0) eng.quit() matfile = scipy.io.loadmat( '%s/data/multi_sim_only_mcmc_test.mat' % script_path) except Exception as e: print(e) print('make sure matlab.engine installed') y = matfile['y'].T y_ind = matfile['y_ind'].T x = matfile['x'] data = SepiaData(x_sim=x[:, 0][:, None], t_sim=x[:, 1][:, None], y_sim=y, y_ind_sim=y_ind) data.standardize_y() data.transform_xt() data.create_K_basis(n_pc=n_pc) print(data) np.random.seed(int(seed)) model = setup_model(data) if lamWOs_init > 0: model.params.lamWOs.val = np.array([[lamWOs_init]]) model.params.mcmcList = [ model.params.mcmcList[i - 1] for i in list_to_sample ] t_start = time() model.do_mcmc(nburn + nsamp) t_end = time() print('Python mcmc time %0.3g s' % (t_end - t_start)) print('Matlab mcmc time %0.3g s' % matfile['mcmc_time']) np.random.seed(seed) psamps = model.get_samples(0, sampleset=[0, 1, 2, 3, 4], flat=True) pred = wPred([0.5, 0.5], psamps, model.num, model.data, returnMuSigma=True) print('Samples are:') print(pred.w.squeeze()) print('Matlab Samples are:') print(matfile['pred_w'].squeeze()) print('Mu are:') print(pred.mu.squeeze()) print('Matlab Mu are:') print(matfile['pred_Myhat']) print('Sigma are:') print(pred.sigma.squeeze().squeeze().reshape(10, 2).T) print('Matlab Sigma are:') print(matfile['pred_Syhat'].squeeze()) print('Checking predicted realizations...') self.assertTrue( np.allclose(matfile['pred_w'].squeeze(), pred.w.squeeze())) print('Checking predicted means...') self.assertTrue( np.allclose(matfile['pred_Myhat'].squeeze(), pred.mu.squeeze())) print('Checking predicted sigmas...') self.assertTrue( np.allclose(matfile['pred_Syhat'].squeeze(), pred.sigma.squeeze().reshape(10, 2).T)) pred_arv = wPred([0.5, 0.5], psamps, model.num, model.data, addResidVar=True, returnMuSigma=True) print('Add Residual Variance test') print('Samples are:') print(pred_arv.w.squeeze()) print('Matlab Samples are:') print(matfile['pred_arv_w'].squeeze()) print('Mu are:') print(pred_arv.mu.squeeze()) print('Matlab Mu are:') print(matfile['pred_arv_Myhat']) print('Sigma are:') print(pred_arv.sigma.squeeze().squeeze().reshape(10, 2).T) print('Matlab Sigma are:') print(matfile['pred_arv_Syhat'].squeeze()) print('Checking predicted realizations...') self.assertTrue( np.allclose(matfile['pred_arv_w'].squeeze(), pred_arv.w.squeeze())) print('Checking predicted means...') self.assertTrue( np.allclose(matfile['pred_arv_Myhat'].squeeze(), pred_arv.mu.squeeze())) print('Checking predicted sigmas...') self.assertTrue( np.allclose(matfile['pred_arv_Syhat'].squeeze(), pred_arv.sigma.squeeze().reshape(10, 2).T)) print('Done.')
def test_multivariate_sim_and_obs_ragged(self): """ Tests multivariate sim and obs where we pass in x and t but obs is ragged. """ m = 700 # number of simulated observations p = 3 # dimension of x (simulation inputs) ell_sim = 1000 # dimension of y output sim pu = 3 # number of PCs q = 2 # dimension of t (extra sim inputs) n = 5 # number of observed observations ell_obs = np.random.randint(100, 600, n) y_ind_sim = np.linspace(0, 100, ell_sim) K_true_sim = np.vstack([ 0.5 * (np.sin(y_ind_sim) + 1), np.square(-y_ind_sim + 50) / 2500, y_ind_sim / 100 ]) y_sim = np.transpose( np.log(1 + y_ind_sim)[:, None] + np.dot( K_true_sim.T, 2 * np.array([1, 0.5, 0.2])[:, None] * np.random.normal(0, 1, (pu, m)))) x_sim = 0.5 * np.random.uniform(-1, 3, (m, p)) t = np.random.uniform(-10, 10, (m, q)) y_ind_obs = [ np.linspace(0, 100, ell_obs[i]) + np.random.uniform(-3, 3, ell_obs[i]) for i in range(len(ell_obs)) ] for yi in y_ind_obs: yi[yi < 0] = 0 K_true_obs = [ np.vstack( [0.5 * (np.sin(yi) + 1), np.square(-yi + 50) / 2500, yi / 100]) for yi in y_ind_obs ] y_obs = [ 10 + np.squeeze( np.log(1 + y_ind_obs[i])[:, None] + np.dot( K_true_obs[i].T, 2 * np.array([1, 0.5, 0.2])[:, None] * np.random.normal(0, 1, (pu, 1)))) for i in range(len(y_ind_obs)) ] x_obs = 0.5 * np.random.uniform(-1, 3, (n, p)) d = SepiaData(x_sim=x_sim, y_sim=y_sim, t_sim=t, y_ind_sim=y_ind_sim, x_obs=x_obs, y_obs=y_obs, y_ind_obs=y_ind_obs) print('Testing multivariate sim and obs SepiaData...') print(d) self.assertTrue(d.obs_data is not None) self.assertTrue(not d.sim_only) self.assertTrue(not d.scalar_out) d.transform_xt() self.assertTrue(np.all(np.min(d.sim_data.x_trans, 0) == 0)) self.assertTrue(np.all(np.max(d.sim_data.x_trans, 0) == 1)) self.assertTrue(np.all(np.min(d.sim_data.t_trans, 0) == 0)) self.assertTrue(np.all(np.max(d.sim_data.t_trans, 0) == 1)) d.standardize_y(center=False, scale=False) self.assertEqual(d.sim_data.orig_y_sd, 1) self.assertEqual(d.sim_data.orig_y_mean, 0) self.assertTrue(np.allclose(d.sim_data.y, d.sim_data.y_std)) #self.assertEqual(d.obs_data.orig_y_sd, 1) #self.assertEqual(d.obs_data.orig_y_mean, 0) #self.assertTrue(np.allclose(d.obs_data.y, d.obs_data.y_std)) d.standardize_y(scale='columnwise') self.assertTrue( np.allclose(d.sim_data.orig_y_mean, np.log(1 + y_ind_sim), rtol=0.1, atol=0.5)) self.assertTrue(np.allclose(np.mean(d.sim_data.y_std, 0), 0, rtol=0.1)) self.assertTrue(np.allclose(np.std(d.sim_data.y_std, 0), 1, rtol=0.1)) self.assertTrue(d.sim_data.y.shape == d.sim_data.y_std.shape) d.create_K_basis(3) self.assertTrue(d.sim_data.K.shape == (pu, ell_sim)) d.create_D_basis() print(d)
def test_multivariate_sim_and_obs_no_x(self): """ Tests multivariate sim and obs where we pass in t but not x (x is a dummy variable). """ m = 700 # number of simulated observations ell_sim = 1000 # dimension of y output sim ell_obs = 258 # dimension of y output obs pu = 3 # number of PCs q = 2 # dimension of t (extra sim inputs) n = 5 # number of observed observations y_ind_sim = np.linspace(0, 100, ell_sim) K_true_sim = np.vstack([ 0.5 * (np.sin(y_ind_sim) + 1), np.square(-y_ind_sim + 50) / 2500, y_ind_sim / 100 ]) y_sim = np.transpose( np.log(1 + y_ind_sim)[:, None] + np.dot( K_true_sim.T, 2 * np.array([1, 0.5, 0.2])[:, None] * np.random.normal(0, 1, (pu, m)))) t = np.random.uniform(-10, 10, (m, q)) y_ind_obs = np.linspace(0, 100, ell_obs) + np.random.uniform( -3, 3, ell_obs) y_ind_obs[y_ind_obs < 0] = 0 K_true_obs = np.vstack([ 0.5 * (np.sin(y_ind_obs) + 1), np.square(-y_ind_obs + 50) / 2500, y_ind_obs / 100 ]) y_obs = 10 + np.transpose( np.log(1 + y_ind_obs)[:, None] + np.dot( K_true_obs.T, 2 * np.array([1, 0.5, 0.2])[:, None] * np.random.normal(0, 1, (pu, n)))) d = SepiaData(y_sim=y_sim, t_sim=t, y_ind_sim=y_ind_sim, y_obs=y_obs, y_ind_obs=y_ind_obs) print('Testing multivariate sim and obs SepiaData...') print(d) self.assertTrue(d.obs_data is not None) self.assertTrue(not d.sim_only) self.assertTrue(not d.scalar_out) d.transform_xt() self.assertTrue(np.all(d.sim_data.x_trans == 0.5)) self.assertTrue(np.all(d.obs_data.x_trans == 0.5)) self.assertTrue(np.all(np.min(d.sim_data.t_trans, 0) == 0)) self.assertTrue(np.all(np.max(d.sim_data.t_trans, 0) == 1)) d.standardize_y(center=False, scale=False) self.assertEqual(d.sim_data.orig_y_sd, 1) self.assertEqual(d.sim_data.orig_y_mean, 0) self.assertTrue(np.allclose(d.sim_data.y, d.sim_data.y_std)) self.assertEqual(d.obs_data.orig_y_sd, 1) self.assertEqual(d.obs_data.orig_y_mean, 0) self.assertTrue(np.allclose(d.obs_data.y, d.obs_data.y_std)) d.standardize_y(scale='columnwise') self.assertTrue( np.allclose(d.sim_data.orig_y_mean, np.log(1 + y_ind_sim), rtol=0.1, atol=0.5)) self.assertTrue(np.allclose(np.mean(d.sim_data.y_std, 0), 0, rtol=0.1)) self.assertTrue(np.allclose(np.std(d.sim_data.y_std, 0), 1, rtol=0.1)) self.assertTrue(d.sim_data.y.shape == d.sim_data.y_std.shape) self.assertTrue( np.allclose(d.obs_data.orig_y_mean, np.log(1 + y_ind_obs), rtol=0.1, atol=0.5)) self.assertTrue(d.obs_data.y.shape == d.obs_data.y_std.shape) d.create_K_basis(3) self.assertTrue(d.sim_data.K.shape == (pu, ell_sim)) self.assertTrue(d.obs_data.K.shape == (pu, ell_obs)) d.create_D_basis() self.assertTrue(d.obs_data.D.shape == (1, ell_obs)) print(d)
def test_full_setup_LL_pred(self): # Set up and check calibration model x_obs = np.ones((3, 2)) * np.array([0.5, 0.75, 0.25]).reshape((-1, 1)) y_obs = np.block([[-0.1, 0.1], [-0.2, 0.3], [0.1, 0]]) # augment to also test more than scalar dimensions in x and t x_sim_cal = np.hstack((0.5 * np.ones( (self.x_sim.shape[0], 1)), self.x_sim[:, :1])) t_sim_cal = self.x_sim[:, 1:] xt_sim_sep = [np.array(0.5).reshape(1, 1)] + self.x_sim_kron y_sim_std = (self.y_sim - np.mean(self.y_sim, axis=0).reshape( 1, -1)) / np.std(self.y_sim, axis=0, ddof=1).reshape(1, -1) dc = SepiaData(x_sim=x_sim_cal, t_sim=t_sim_cal, y_sim=y_sim_std, x_obs=x_obs, y_obs=y_obs, y_ind_sim=self.y_ind_sim, y_ind_obs=self.y_ind_sim) dc.create_K_basis(K=np.eye(2)) dc.create_D_basis(D_sim=np.eye(2), D_obs=np.eye(2)) dc.transform_xt(x_notrans=True, t_notrans=True) dc.standardize_y(y_mean=0, y_sd=1) print(dc) cmod = SepiaModel(dc) print('Calibration model LL=%f' % compute_log_lik(cmod)) kdc = SepiaData(xt_sim_sep=xt_sim_sep, y_sim=y_sim_std, x_obs=x_obs, y_obs=y_obs, y_ind_sim=self.y_ind_sim, y_ind_obs=self.y_ind_sim) kdc.create_K_basis(K=np.eye(2)) kdc.create_D_basis(D_sim=np.eye(2), D_obs=np.eye(2)) kdc.transform_xt(x_notrans=True, t_notrans=True) kdc.standardize_y(y_mean=0, y_sd=1) print(kdc) kcmod = SepiaModel(kdc) print('Calibration Sep model LL=%f' % compute_log_lik(kcmod)) self.assertAlmostEqual(compute_log_lik(cmod), compute_log_lik(kcmod), places=5) print( 'Running MCMC on Calibration model in Sep pred mode, regular design' ) np.random.seed(42) t1 = time() cmod.do_mcmc(10) print('sampling time %f' % (time() - t1)) csamp = cmod.get_samples(sampleset=[cmod.get_last_sample_ind()]) cpred = SepiaFullPrediction(mode='Sep', model=cmod, samples=csamp, storeMuSigma=True, x_pred=np.array([0.5, 0.5]).reshape( (1, -1))) print(cpred.get_ysim()) csm, css = cpred.get_mu_sigma() print(csm) print(css) print( 'Running MCMC on Calibration model in Sep pred mode, separable design' ) np.random.seed(42) t1 = time() kcmod.do_mcmc(10) print('sampling time %f' % (time() - t1)) kcsamp = kcmod.get_samples(sampleset=[kcmod.get_last_sample_ind()]) kcpred = SepiaFullPrediction(mode='Sep', model=kcmod, samples=kcsamp, storeMuSigma=True, x_pred=np.array([0.5, 0.5]).reshape( (1, -1))) print(kcpred.get_ysim()) kcsm, kcss = kcpred.get_mu_sigma() print(kcsm) print(kcss) print('testing max difference which is %g' % np.max(abs(csm - kcsm))) self.assertAlmostEqual(0, np.max(abs(csm - kcsm))) print('testing max difference which is %g' % np.max(abs(css - kcss))) self.assertAlmostEqual(0, np.max(abs(css - kcss))) ###### Timing for predictions test_x_pred = np.random.rand(10, 2) csamp = cmod.get_samples() t1 = time() cpred0 = SepiaFullPrediction(mode='notSep', model=cmod, samples=csamp, storeMuSigma=True, x_pred=test_x_pred) print('predict time non-Sep in non-Sep mode %f' % (time() - t1)) t1 = time() cpred = SepiaFullPrediction(mode='Sep', model=cmod, samples=csamp, storeMuSigma=True, x_pred=test_x_pred) print('predict time non-sep in Sep mode %f' % (time() - t1)) kcsamp = kcmod.get_samples() t1 = time() kcpred = SepiaFullPrediction(mode='Sep', model=kcmod, samples=kcsamp, storeMuSigma=True, x_pred=test_x_pred) print('predict time Sep %f' % (time() - t1)) pass
print('make sure matlab.engine installed') y_sim = matfile['y'].T y_ind_sim = matfile['y_ind'].squeeze() xt_sim = matfile['x'] y_obs = matfile['y_obs'] y_ind_obs = matfile['y_ind_obs'].squeeze() x_obs = matfile['x_obs'] data = SepiaData(x_sim=xt_sim[:, 0][:, None], t_sim=xt_sim[:, 1][:, None], y_sim=y_sim, y_ind_sim=y_ind_sim, x_obs=x_obs, y_obs=y_obs, y_ind_obs=y_ind_obs) data.standardize_y() data.transform_xt() data.create_K_basis(n_pc=2) data.create_D_basis(D_obs=matfile['Dobs'].T) print(data) model = setup_model(data) nsamp = int(matfile['nsamp']) nburn = int(matfile['nburn']) @timeit def run_mcmc(): model.do_mcmc(nburn + nsamp)
def test_multivariate_sim_and_obs_setup(self): """ Tests setup for multivariate sim and obs model with D """ d = SepiaData(t_sim=self.data_dict['t_sim'], y_sim=self.data_dict['y_sim'], y_ind_sim=self.data_dict['y_ind_sim'], y_obs=self.data_dict['y_obs'], y_ind_obs=self.data_dict['y_ind_obs']) print( 'Testing multivariate sim and obs SepiaModelSetup with discrep...', flush=True) print(d, flush=True) # Try it without doing standardization/transform/pc basis to be sure it doesn't break model_notrans = setup_model(copy.deepcopy(d)) # Do explicit transformation d.transform_xt() d.standardize_y() d.create_K_basis(n_pc=5) d.create_D_basis(type='constant') d.create_D_basis(type='linear') custom_D = np.vstack( [np.ones(d.obs_data.y.shape[1]), d.obs_data.y_ind]) d.create_D_basis(D_obs=custom_D) model = setup_model(d) # Check that either way gives same transformation self.assertTrue( np.allclose(model_notrans.data.sim_data.orig_y_mean, model.data.sim_data.orig_y_mean)) self.assertTrue( np.allclose(model_notrans.data.sim_data.orig_y_sd, model.data.sim_data.orig_y_sd)) self.assertTrue( np.allclose(model_notrans.data.sim_data.y_std, model.data.sim_data.y_std)) self.assertTrue( np.allclose(model_notrans.data.sim_data.t_trans, model.data.sim_data.t_trans)) self.assertTrue( np.allclose(model_notrans.data.obs_data.orig_y_mean, model.data.obs_data.orig_y_mean)) self.assertTrue( np.allclose(model_notrans.data.obs_data.orig_y_sd, model.data.obs_data.orig_y_sd)) self.assertTrue( np.allclose(model_notrans.data.obs_data.y_std, model.data.obs_data.y_std)) # Check model components are set up as expected self.assertTrue(not model.num.scalar_out) self.assertTrue(not model.num.sim_only) self.assertTrue(model.num.m == 100) self.assertTrue(model.num.n == 1) self.assertTrue(model.num.p == 1) self.assertTrue(model.num.q == 3) self.assertTrue(model.num.pu == 5) self.assertTrue(model.num.pv == 2) #self.assertTrue(np.allclose(model.num.w, model.data.sim_data.y_std)) # TODO compute projection #self.assertTrue(np.allclose(model.num.u, model.data.obs_data.y_std)) # TODO compute projection # self.assertTrue(np.allclose(model.num.v, model.data.obs_data.y_std)) # TODO compute projection # Check parameter setup -- betaU betaU = model.params.betaU self.assertTrue(betaU.val_shape == (model.num.q + model.num.p, model.num.pu)) self.assertTrue(betaU.prior.dist == 'Beta') self.assertTrue(betaU.mcmc.stepType == 'BetaRho') # Check parameter setup -- betaV betaV = model.params.betaV self.assertTrue(betaV.val_shape == (1, 1)) self.assertTrue(betaV.prior.dist == 'Beta') self.assertTrue(betaV.mcmc.stepType == 'BetaRho') # Check parameter setup -- lamUz lamUz = model.params.lamUz self.assertTrue(lamUz.val_shape == (1, model.num.pu)) self.assertTrue(lamUz.prior.dist == 'Gamma') self.assertTrue(lamUz.mcmc.stepType == 'PropMH') # Check parameter setup -- lamUz lamVz = model.params.lamVz self.assertTrue(lamVz.val_shape == (1, 1)) self.assertTrue(lamVz.prior.dist == 'Gamma') self.assertTrue(lamVz.mcmc.stepType == 'PropMH') # Check parameter setup -- lamWOs lamWOs = model.params.lamWOs self.assertTrue(lamWOs.val_shape == (1, 1)) self.assertTrue(lamWOs.prior.dist == 'Gamma') self.assertTrue(lamWOs.mcmc.stepType == 'PropMH') # Check parameter setup -- lamWs lamWs = model.params.lamWs self.assertTrue(lamWs.val_shape == (1, model.num.pu)) self.assertTrue(lamWs.prior.dist == 'Gamma') self.assertTrue(lamWs.mcmc.stepType == 'PropMH') # Check parameter setup -- lamOs lamOs = model.params.lamOs self.assertTrue(lamOs.val_shape == (1, 1)) self.assertTrue(lamOs.prior.dist == 'Gamma') self.assertTrue(lamOs.mcmc.stepType == 'PropMH') # Check parameter setup -- theta theta = model.params.theta self.assertTrue(theta.val_shape == (1, model.num.q)) self.assertTrue(theta.prior.dist == 'Normal') self.assertTrue(theta.mcmc.stepType == 'Uniform') self.assertTrue(np.allclose(theta.orig_range[0], 0)) self.assertTrue(np.allclose(theta.orig_range[1], 1)) mcmc_list_names = [p.name for p in model.params.mcmcList] self.assertTrue( set(mcmc_list_names) == set([ 'betaU', 'betaV', 'lamUz', 'lamVz', 'lamWOs', 'lamWs', 'lamOs', 'theta' ]))