def setUp(self, m=100, n=1, nt_sim=50, nt_obs=20, n_theta=3, n_basis=5, sig_n=0.1, seed=42): n_hier = 3 self.hier_idx = np.array([[0, 0, 0]]) #self.hier_idx = np.array([[1, 1, 1], [2, -1, 2]]) # TODO this fails for multivariate; cant use for univariate now multi_data_list = [] univ_data_list = [] for si in range(n_hier): multi_data_dict = generate_data.generate_multi_sim_and_obs(m=m, n=n, nt_sim=nt_sim, nt_obs=nt_obs, n_theta=n_theta, n_basis=n_basis, sig_n=sig_n, seed=seed) univ_data_dict = generate_data.generate_univ_sim_and_obs(m=m, n=n, sig_n=sig_n, seed=seed) d = SepiaData(t_sim=univ_data_dict['t_sim'], y_sim=univ_data_dict['y_sim'], y_obs=univ_data_dict['y_obs']) d.transform_xt() d.standardize_y() univ_data_list.append(d) d = SepiaData(t_sim=multi_data_dict['t_sim'], y_sim=multi_data_dict['y_sim'], y_ind_sim=multi_data_dict['y_ind_sim'], y_obs=multi_data_dict['y_obs'], y_ind_obs=multi_data_dict['y_ind_obs']) d.transform_xt() d.standardize_y() d.create_K_basis(5) d.create_D_basis('constant') multi_data_list.append(d) self.univ_model_list = [SepiaModel(d) for d in univ_data_list] self.multi_model_list = [SepiaModel(d) for d in multi_data_list]
def test_univariate_sim_and_obs(self): """ Tests univiariate sim and obs where we pass in both x and t. """ m = 700 # number of simulated observations p = 3 # dimension of x (sim/obs inputs) q = 2 # dimension of t (extra sim inputs) n = 5 # number of observed observations x_sim = np.random.uniform(-1, 3, (m, p)) t = np.random.uniform(-10, 10, (m, q)) x_obs = np.random.uniform(-1.5, 3.5, (n, p)) y_sim = 5 * np.random.normal(0, 1, m) + 2 y_obs = 5 * np.random.normal(0, 1, n) + 1 d = SepiaData(x_sim=x_sim, y_sim=y_sim, t_sim=t, x_obs=x_obs, y_obs=y_obs) print('Testing univariate sim and obs SepiaData...') print(d) self.assertTrue(d.obs_data is not None) self.assertTrue(not d.sim_only) self.assertTrue(d.scalar_out) d.transform_xt() self.assertTrue(np.all(np.min(d.sim_data.x_trans, 0) == 0)) self.assertTrue(np.all(np.max(d.sim_data.x_trans, 0) == 1)) self.assertTrue(np.all(np.min(d.sim_data.t_trans, 0) == 0)) self.assertTrue(np.all(np.max(d.sim_data.t_trans, 0) == 1)) d.transform_xt(-10, 10) self.assertTrue(np.all(np.min(d.sim_data.x_trans, 0) == -10)) self.assertTrue(np.all(np.max(d.sim_data.x_trans, 0) == 10)) self.assertTrue(np.all(np.min(d.sim_data.t_trans, 0) == -10)) self.assertTrue(np.all(np.max(d.sim_data.t_trans, 0) == 10)) d.standardize_y(center=False, scale=False) self.assertEqual(d.sim_data.orig_y_sd, 1) self.assertEqual(d.sim_data.orig_y_mean, 0) self.assertTrue(np.allclose(d.sim_data.y, d.sim_data.y_std)) self.assertEqual(d.obs_data.orig_y_sd, 1) self.assertEqual(d.obs_data.orig_y_mean, 0) self.assertTrue(np.allclose(d.obs_data.y, d.obs_data.y_std)) d.standardize_y(scale='columnwise') self.assertTrue(np.allclose(d.sim_data.orig_y_sd, 5, rtol=0.1)) self.assertTrue(np.allclose(d.sim_data.orig_y_mean, 2, rtol=0.1)) self.assertTrue(np.allclose(np.mean(d.sim_data.y_std, 0), 0, rtol=0.1)) self.assertTrue(np.allclose(np.std(d.sim_data.y_std, 0), 1, rtol=0.1)) self.assertTrue(d.sim_data.y.shape == d.sim_data.y_std.shape) self.assertTrue(np.allclose(d.obs_data.orig_y_sd, 5, rtol=0.1)) self.assertTrue(np.allclose(d.obs_data.orig_y_mean, 2, rtol=0.1)) self.assertTrue(d.obs_data.y.shape == d.obs_data.y_std.shape) d.create_K_basis(10) d.create_D_basis()
def test_multivariate_sim_only_x_only(self): """ Tests setup for multivariate sim only where we only use an x input, not t. """ m = 700 # number of simulated observations p = 3 # dimension of x (simulation inputs) ell = 1000 # dimension of y output pu = 3 # number of PCs y_ind = np.linspace(0, 100, ell) K_true = np.vstack([ 0.5 * (np.sin(y_ind) + 1), np.square(-y_ind + 50) / 2500, y_ind / 100 ]) y = np.transpose( np.log(1 + y_ind)[:, None] + np.dot( K_true.T, 2 * np.array([1, 0.5, 0.2])[:, None] * np.random.normal(0, 1, (pu, m)))) x = 0.5 * np.random.uniform(-1, 3, (m, p)) d = SepiaData(x_sim=x, y_sim=y, t_sim=None, y_ind_sim=y_ind) print('Testing multivariate sim-only SepiaData...') print(d) self.assertTrue(d.obs_data is None) self.assertTrue(d.sim_only) self.assertTrue(not d.scalar_out) d.transform_xt() self.assertTrue(np.all(np.min(d.sim_data.x_trans, 0) == 0)) self.assertTrue(np.all(np.max(d.sim_data.x_trans, 0) == 1)) d.transform_xt(-10, 10) self.assertTrue(np.all(np.min(d.sim_data.x_trans, 0) == -10)) self.assertTrue(np.all(np.max(d.sim_data.x_trans, 0) == 10)) d.standardize_y(center=False, scale=False) self.assertEqual(d.sim_data.orig_y_sd, 1) self.assertEqual(d.sim_data.orig_y_mean, 0) self.assertTrue(np.allclose(d.sim_data.y, d.sim_data.y_std)) d.standardize_y(scale='columnwise') self.assertTrue( np.allclose(d.sim_data.orig_y_mean, np.log(1 + y_ind), rtol=0.1, atol=0.5)) self.assertTrue(np.allclose(np.std(d.sim_data.y_std, 0), 1, rtol=0.1)) self.assertTrue(np.allclose(np.mean(d.sim_data.y_std, 0), 0, rtol=0.1)) self.assertTrue(d.sim_data.y.shape == d.sim_data.y_std.shape) d.create_K_basis(3) self.assertTrue(d.sim_data.K.shape == (pu, ell)) d.create_D_basis() print(d)
def test_predict_multi_sim_only(self): show_figs = True exclude_burnin = True n_pc = 2 seed = 42 lamWOs_init = 50000. # use 0 to use default lamWOs initial value list_to_sample = [ 1, 2, 3, 4 ] # 1-based indexing for matlab; order is [betaU, lamUz, lamWs, lamWOs] nsamp = 100 nburn = 10 # Open data from matlab script_path = os.path.dirname(os.path.realpath(__file__)) mat_fn = '%s/data/multi_sim_only_mcmc_test.mat' % script_path if os.path.isfile(mat_fn): # if the matlab data is already in place, just load that print( 'Found matfile, loading from multi_sim_only_mcmc_test.mat \n') matfile = scipy.io.loadmat(mat_fn) else: print('Generating matfile multi_sim_only_mcmc_test.mat \n') # Run matlab code, then open data from matlab list_to_sample = [ 1, 2, 3, 4 ] # 1-based indexing for matlab; order is [betaU, lamUz, lamWs, lamWOs] script_path = os.path.dirname(os.path.realpath(__file__)) # Run matlab code, then open data from matlab try: eng = matlab.engine.start_matlab() eng.cd(script_path) eng.addpath('matlab/', nargout=0) eng.multi_sim_only_mcmc_test(nsamp, nburn, list_to_sample, seed, lamWOs_init, n_pc, nargout=0) eng.quit() matfile = scipy.io.loadmat( '%s/data/multi_sim_only_mcmc_test.mat' % script_path) except Exception as e: print(e) print('make sure matlab.engine installed') y = matfile['y'].T y_ind = matfile['y_ind'].T x = matfile['x'] data = SepiaData(x_sim=x[:, 0][:, None], t_sim=x[:, 1][:, None], y_sim=y, y_ind_sim=y_ind) data.standardize_y() data.transform_xt() data.create_K_basis(n_pc=n_pc) print(data) np.random.seed(int(seed)) model = setup_model(data) if lamWOs_init > 0: model.params.lamWOs.val = np.array([[lamWOs_init]]) model.params.mcmcList = [ model.params.mcmcList[i - 1] for i in list_to_sample ] t_start = time() model.do_mcmc(nburn + nsamp) t_end = time() print('Python mcmc time %0.3g s' % (t_end - t_start)) print('Matlab mcmc time %0.3g s' % matfile['mcmc_time']) np.random.seed(seed) psamps = model.get_samples(0, sampleset=[0, 1, 2, 3, 4], flat=True) pred = wPred([0.5, 0.5], psamps, model.num, model.data, returnMuSigma=True) print('Samples are:') print(pred.w.squeeze()) print('Matlab Samples are:') print(matfile['pred_w'].squeeze()) print('Mu are:') print(pred.mu.squeeze()) print('Matlab Mu are:') print(matfile['pred_Myhat']) print('Sigma are:') print(pred.sigma.squeeze().squeeze().reshape(10, 2).T) print('Matlab Sigma are:') print(matfile['pred_Syhat'].squeeze()) print('Checking predicted realizations...') self.assertTrue( np.allclose(matfile['pred_w'].squeeze(), pred.w.squeeze())) print('Checking predicted means...') self.assertTrue( np.allclose(matfile['pred_Myhat'].squeeze(), pred.mu.squeeze())) print('Checking predicted sigmas...') self.assertTrue( np.allclose(matfile['pred_Syhat'].squeeze(), pred.sigma.squeeze().reshape(10, 2).T)) pred_arv = wPred([0.5, 0.5], psamps, model.num, model.data, addResidVar=True, returnMuSigma=True) print('Add Residual Variance test') print('Samples are:') print(pred_arv.w.squeeze()) print('Matlab Samples are:') print(matfile['pred_arv_w'].squeeze()) print('Mu are:') print(pred_arv.mu.squeeze()) print('Matlab Mu are:') print(matfile['pred_arv_Myhat']) print('Sigma are:') print(pred_arv.sigma.squeeze().squeeze().reshape(10, 2).T) print('Matlab Sigma are:') print(matfile['pred_arv_Syhat'].squeeze()) print('Checking predicted realizations...') self.assertTrue( np.allclose(matfile['pred_arv_w'].squeeze(), pred_arv.w.squeeze())) print('Checking predicted means...') self.assertTrue( np.allclose(matfile['pred_arv_Myhat'].squeeze(), pred_arv.mu.squeeze())) print('Checking predicted sigmas...') self.assertTrue( np.allclose(matfile['pred_arv_Syhat'].squeeze(), pred_arv.sigma.squeeze().reshape(10, 2).T)) print('Done.')