def test_univariate_sim_only_setup(self): """ Tests setup for univariate sim only model """ d = SepiaData(t_sim=self.data_dict['t_sim'], y_sim=self.data_dict['y_sim']) print('Testing univariate sim-only SepiaModelSetup...', flush=True) print(d, flush=True) # Try it without doing standardization/transform to be sure it doesn't break model_notrans = setup_model(copy.deepcopy(d)) # Do explicit transformation d.transform_xt() d.standardize_y() model = setup_model(d) # Check that either way gives same transformation self.assertTrue(np.allclose(model_notrans.data.sim_data.orig_y_mean, model.data.sim_data.orig_y_mean)) self.assertTrue(np.allclose(model_notrans.data.sim_data.orig_y_sd, model.data.sim_data.orig_y_sd)) self.assertTrue(np.allclose(model_notrans.data.sim_data.y_std, model.data.sim_data.y_std)) self.assertTrue(np.allclose(model_notrans.data.sim_data.t_trans, model.data.sim_data.t_trans)) # Check model components are set up as expected self.assertTrue(model.num.scalar_out) self.assertTrue(model.num.sim_only) self.assertTrue(model.num.m == 100) self.assertTrue(model.num.n == 0) self.assertTrue(model.num.p == 1) self.assertTrue(model.num.q == 1) self.assertTrue(model.num.pu == 1) self.assertTrue(model.num.pv == 0) self.assertTrue(np.allclose(model.num.w, model.data.sim_data.y_std)) # Check parameter setup -- betaU betaU = model.params.betaU self.assertTrue(betaU.val_shape == (2, 1)) self.assertTrue(betaU.prior.dist == 'Beta') self.assertTrue(betaU.mcmc.stepType == 'BetaRho') # Check parameter setup -- lamUz lamUz = model.params.lamUz self.assertTrue(lamUz.val_shape == (1, 1)) self.assertTrue(lamUz.prior.dist == 'Gamma') self.assertTrue(lamUz.mcmc.stepType == 'PropMH') # Check parameter setup -- lamWOs lamWOs = model.params.lamWOs self.assertTrue(lamWOs.val_shape == (1, 1)) self.assertTrue(lamWOs.prior.dist == 'Gamma') self.assertTrue(lamWOs.mcmc.stepType == 'PropMH') # Check parameter setup -- lamWs lamWs = model.params.lamWs self.assertTrue(lamWs.val_shape == (1, 1)) self.assertTrue(lamWs.prior.dist == 'Gamma') self.assertTrue(lamWs.mcmc.stepType == 'PropMH') mcmc_list_names = [p.name for p in model.params.mcmcList] self.assertTrue(set(mcmc_list_names) == set(['betaU', 'lamUz', 'lamWOs', 'lamWs']))
def setup_univ_sim_and_obs(m=100, n=50, seed=42., n_lik=0, n_mcmc=0, n_pred=0): try: eng = matlab.engine.start_matlab() eng.cd(root_path) eng.addpath('matlab/', nargout=0) res = eng.setup_univ_sim_and_obs(m, n, seed, n_lik, n_mcmc, n_pred, nargout=1) eng.quit() except Exception as e: print(e) print( 'Matlab error; make sure matlab.engine installed, check Matlab code for errors.' ) y = np.array(res['y'], dtype=float) xt = np.array(res['xt'], dtype=float) y_obs = np.array(res['y_obs'], dtype=float) x_obs = np.array(res['x_obs'], dtype=float).reshape((n, 1)) data = SepiaData(x_sim=xt[:, 0][:, None], t_sim=xt[:, 1][:, None], y_sim=y, x_obs=x_obs, y_obs=y_obs) data.standardize_y() data.transform_xt() print(data) model = setup_model(data) return model, res
def setup_multi_sim_and_obs_sharedtheta(m=100, n=10, nt_sim=20, nt_obs=15, noise_sd=0.1, nx=5, n_pc=10, seed=42., n_lik=0, n_mcmc=0, n_pred=0, n_shared=2, clist=[], fix_K=False): try: eng = matlab.engine.start_matlab() eng.cd(root_path) eng.addpath('matlab/', nargout=0) res = eng.setup_multi_sim_and_obs_sharedtheta(m, n, nt_sim, nt_obs, noise_sd, nx, n_pc, seed, n_lik, n_mcmc, n_pred, n_shared, matlab.double(clist), nargout=1) eng.quit() except Exception as e: print(e) print( 'Matlab error; make sure matlab.engine installed, check Matlab code for errors.' ) y = np.array(res['y'], dtype=float) # (m, nt_sim, n_shared) y_ind = np.array(res['y_ind'], dtype=float).squeeze() # (nt_sim, n_shared) xt = np.array(res['xt'], dtype=float) # (m, nx, n_shared) y_obs = np.array(res['y_obs'], dtype=float) # (n, nt_sim, n_shared) y_ind_obs = np.array(res['y_ind_obs'], dtype=float).squeeze() # (nt_obs, n_shared) x_obs = np.array(res['x_obs'], dtype=float) # (n, 1, n_shared) model_list = [] for i in range(n_shared): data = SepiaData(x_sim=xt[:, 0, i][:, None], t_sim=xt[:, 1:, i], y_sim=y[:, :, i], y_ind_sim=y_ind[:, i], x_obs=x_obs[:, :, i], y_obs=y_obs[:, :, i], y_ind_obs=y_ind_obs[:, i]) data.standardize_y() data.transform_xt() data.create_K_basis(n_pc) model = setup_model(data) model_list.append(model) return model_list, res
def setUp(self, m=100, n=1, nt_sim=50, nt_obs=20, n_theta=3, n_basis=5, sig_n=0.1, seed=42): n_hier = 3 self.hier_idx = np.array([[0, 0, 0]]) #self.hier_idx = np.array([[1, 1, 1], [2, -1, 2]]) # TODO this fails for multivariate; cant use for univariate now multi_data_list = [] univ_data_list = [] for si in range(n_hier): multi_data_dict = generate_data.generate_multi_sim_and_obs( m=m, n=n, nt_sim=nt_sim, nt_obs=nt_obs, n_theta=n_theta, n_basis=n_basis, sig_n=sig_n, seed=seed) univ_data_dict = generate_data.generate_univ_sim_and_obs( m=m, n=n, sig_n=sig_n, seed=seed) d = SepiaData(t_sim=univ_data_dict['t_sim'], y_sim=univ_data_dict['y_sim'], y_obs=univ_data_dict['y_obs']) d.transform_xt() d.standardize_y() univ_data_list.append(d) d = SepiaData(t_sim=multi_data_dict['t_sim'], y_sim=multi_data_dict['y_sim'], y_ind_sim=multi_data_dict['y_ind_sim'], y_obs=multi_data_dict['y_obs'], y_ind_obs=multi_data_dict['y_ind_obs']) d.transform_xt() d.standardize_y() d.create_K_basis(5) d.create_D_basis('constant') multi_data_list.append(d) self.univ_model_list = [setup_model(d) for d in univ_data_list] self.multi_model_list = [setup_model(d) for d in multi_data_list]
def setup_multi_sim_and_obs(m=100, n=10, nt_sim=20, nt_obs=15, noise_sd=0.1, nx=5, n_pc=10, seed=42., n_lik=0, n_mcmc=0, n_pred=0, fix_K=False): try: eng = matlab.engine.start_matlab() eng.cd(root_path) eng.addpath('matlab/', nargout=0) res = eng.setup_multi_sim_and_obs(m, n, nt_sim, nt_obs, noise_sd, nx, n_pc, seed, n_lik, n_mcmc, n_pred, nargout=1) eng.quit() except Exception as e: print(e) print( 'Matlab error; make sure matlab.engine installed, check Matlab code for errors.' ) y = np.array(res['y'], dtype=float) y_ind = np.array(res['y_ind'], dtype=float).squeeze() xt = np.array(res['xt'], dtype=float) y_obs = np.array(res['y_obs'], dtype=float) y_ind_obs = np.array(res['y_ind_obs'], dtype=float).squeeze() x_obs = np.array(res['x_obs'], dtype=float) data = SepiaData(x_sim=xt[:, 0][:, None], t_sim=xt[:, 1:], y_sim=y, y_ind_sim=y_ind, x_obs=x_obs, y_obs=y_obs, y_ind_obs=y_ind_obs) data.standardize_y() data.transform_xt() if fix_K: # means use the K from matlab - avoid issues with positive/negative component ambiguity data.create_K_basis(n_pc, K=np.array(res['K']).T) else: data.create_K_basis(n_pc) data.create_D_basis('constant') print(data) model = setup_model(data) return model, res
def create_test_case(): n_obs = 2 n_sim = 5 p = 3 q = 4 ell_sim = 80 ell_obs = 20 t = np.random.uniform(0, 1, (n_sim, q)) x = 0.5 * np.ones((n_sim, p)) y_ind = np.linspace(0, 100, ell_sim) y = 10 * np.random.normal( 0, 1, (n_sim, 1)) * (y_ind[None, :] - 50)**2 / 75. + 20 * np.random.normal( 0, 1, (n_sim, 1)) * y_ind[None, :] + 20 * np.random.normal( 0, 1, (n_sim, 1)) x_obs = 0.5 * np.ones((n_obs, p)) y_obs_ind = np.linspace(10, 85, ell_obs) y_obs = 10 * np.random.normal(0, 1, (n_obs, 1)) * ( y_obs_ind[None, :] - 50)**2 / 75. + 20 * np.random.normal( 0, 1, (n_obs, 1)) * y_obs_ind[None, :] + 20 * np.random.normal( 0, 1, (n_obs, 1)) data = SepiaData(x_sim=x, t_sim=t, y_sim=y, y_ind_sim=y_ind, x_obs=x_obs, y_obs=y_obs, y_ind_obs=y_obs_ind) data.standardize_y() data.transform_xt() data.create_K_basis(n_pc=3) data.create_D_basis('constant') # Save as matfile for testing in matlab savedict = { 't': t, 'y': y, 'y_obs': y_obs, 'D': data.obs_data.D, 'Kobs': data.obs_data.K, 'Ksim': data.sim_data.K, 'y_obs_std': data.obs_data.y_std, 'y_sim_std': data.sim_data.y_std, 'y_sd': data.sim_data.orig_y_sd } scipy.io.savemat('data/test_case_matlab.mat', savedict) g = setup_model(data) # Save pickle file of results savedict = {'model': g, 'data': data} with open('data/test_case_python_model.pkl', 'wb') as f: pickle.dump(savedict, f)
def setUp(self, m=100, n=1, nt_sim=50, nt_obs=20, n_theta=3, n_basis=5, sig_n=0.1, seed=42): multi_data_dict = generate_data.generate_multi_sim_and_obs(m=m, n=n, nt_sim=nt_sim, nt_obs=nt_obs, n_theta=n_theta, n_basis=n_basis, sig_n=sig_n, seed=seed) univ_data_dict = generate_data.generate_univ_sim_and_obs(m=m, n=n, sig_n=sig_n, seed=seed) d = SepiaData(t_sim=univ_data_dict['t_sim'], y_sim=univ_data_dict['y_sim']) d.transform_xt() d.standardize_y() self.univ_sim_only_model = setup_model(d) d = SepiaData(t_sim=univ_data_dict['t_sim'], y_sim=univ_data_dict['y_sim'], y_obs=univ_data_dict['y_obs']) d.transform_xt() d.standardize_y() self.univ_sim_and_obs_model = setup_model(d) d = SepiaData(t_sim=multi_data_dict['t_sim'], y_sim=multi_data_dict['y_sim'], y_ind_sim=multi_data_dict['y_ind_sim']) d.transform_xt() d.standardize_y() d.create_K_basis(5) self.multi_sim_only_model = setup_model(d) d = SepiaData(t_sim=multi_data_dict['t_sim'], y_sim=multi_data_dict['y_sim'], y_ind_sim=multi_data_dict['y_ind_sim'], y_obs=multi_data_dict['y_obs'], y_ind_obs=multi_data_dict['y_ind_obs']) d.transform_xt() d.standardize_y() d.create_K_basis(5) self.multi_sim_and_obs_noD_model = setup_model(d) d = SepiaData(t_sim=multi_data_dict['t_sim'], y_sim=multi_data_dict['y_sim'], y_ind_sim=multi_data_dict['y_ind_sim'], y_obs=multi_data_dict['y_obs'], y_ind_obs=multi_data_dict['y_ind_obs']) d.transform_xt() d.standardize_y() d.create_K_basis(5) d.create_D_basis('linear') self.multi_sim_and_obs_model = setup_model(d)
def test_multivariate_sim_and_obs_ragged_setup(self): m = 700 # number of simulated observations p = 3 # dimension of x (simulation inputs) ell_sim = 1000 # dimension of y output sim pu = 3 # number of PCs q = 2 # dimension of t (extra sim inputs) n = 5 # number of observed observations ell_obs = np.random.randint(100, 600, n) y_ind_sim = np.linspace(0, 100, ell_sim) K_true_sim = np.vstack([ 0.5 * (np.sin(y_ind_sim) + 1), np.square(-y_ind_sim + 50) / 2500, y_ind_sim / 100 ]) y_sim = np.transpose( np.log(1 + y_ind_sim)[:, None] + np.dot( K_true_sim.T, 2 * np.array([1, 0.5, 0.2])[:, None] * np.random.normal(0, 1, (pu, m)))) x_sim = 0.5 * np.random.uniform(-1, 3, (m, p)) t = np.random.uniform(-10, 10, (m, q)) y_ind_obs = [ np.linspace(0, 100, ell_obs[i]) + np.random.uniform(-3, 3, ell_obs[i]) for i in range(len(ell_obs)) ] for yi in y_ind_obs: yi[yi < 0] = 0 K_true_obs = [ np.vstack( [0.5 * (np.sin(yi) + 1), np.square(-yi + 50) / 2500, yi / 100]) for yi in y_ind_obs ] y_obs = [ 10 + np.squeeze( np.log(1 + y_ind_obs[i])[:, None] + np.dot( K_true_obs[i].T, 2 * np.array([1, 0.5, 0.2])[:, None] * np.random.normal(0, 1, (pu, 1)))) for i in range(len(y_ind_obs)) ] x_obs = 0.5 * np.random.uniform(-1, 3, (n, p)) d = SepiaData(x_sim=x_sim, y_sim=y_sim, t_sim=t, y_ind_sim=y_ind_sim, x_obs=x_obs, y_obs=y_obs, y_ind_obs=y_ind_obs) model = setup_model(d)
def setup_multi_sim_and_obs_noD(m=100, n=10, nt_sim=20, nt_obs=15, noise_sd=0.1, nx=5, n_pc=10, seed=42., n_lik=0, n_mcmc=0): try: eng = matlab.engine.start_matlab() eng.cd(root_path) eng.addpath('matlab/', nargout=0) res = eng.setup_multi_sim_and_obs_noD(m, n, nt_sim, nt_obs, noise_sd, nx, n_pc, seed, n_lik, n_mcmc, nargout=1) eng.quit() except Exception as e: print(e) print( 'Matlab error; make sure matlab.engine installed, check Matlab code for errors.' ) y = np.array(res['y'], dtype=float) y_ind = np.array(res['y_ind'], dtype=float).squeeze() xt = np.array(res['xt'], dtype=float) y_obs = np.array(res['y_obs'], dtype=float) y_ind_obs = np.array(res['y_ind_obs'], dtype=float).squeeze() x_obs = np.array(res['x_obs'], dtype=float) data = SepiaData(x_sim=xt[:, 0][:, None], t_sim=xt[:, 1:], y_sim=y, y_ind_sim=y_ind, x_obs=x_obs, y_obs=y_obs, y_ind_obs=y_ind_obs) data.standardize_y() data.transform_xt() data.create_K_basis(n_pc) print(data) model = setup_model(data) return model, res
def test_univariate_sim_only_lik(self): """ Tests log lik for univariate sim only model """ d = SepiaData(t_sim=self.univ_data_dict['t_sim'], y_sim=self.univ_data_dict['y_sim']) print('Testing univariate sim-only SepiaLogLik...', flush=True) print(d, flush=True) d.transform_xt() d.standardize_y() model = setup_model(d) model.logLik() for param in model.params.mcmcList: for cindex in range(int(np.prod(param.val_shape))): model.logLik(cvar=param.name, cindex=cindex)
def setup_multi_sim_only(m=300, nt=20, nx=5, n_pc=10, seed=42., n_lik=0, n_mcmc=0, n_pred=0, fix_K=False): try: eng = matlab.engine.start_matlab() eng.cd(root_path) eng.addpath('matlab/', nargout=0) res = eng.setup_multi_sim_only(m, nt, nx, n_pc, seed, n_lik, n_mcmc, n_pred, nargout=1) eng.quit() except Exception as e: print(e) print( 'Matlab error; make sure matlab.engine installed, check Matlab code for errors.' ) y = np.array(res['y'], dtype=float) y_ind = np.array(res['y_ind'], dtype=float).squeeze() xt = np.array(res['xt'], dtype=float) data = SepiaData(x_sim=xt[:, 0][:, None], t_sim=xt[:, 1:], y_sim=y, y_ind_sim=y_ind) data.standardize_y() data.transform_xt() data.create_K_basis(n_pc) if fix_K: data.sim_data.K = np.array(res['K']).T print(data) model = setup_model(data) return model, res
def test_multivariate_sim_and_obs_noD_lik(self): """ Tests log lik for multivariate sim and obs model no discrep """ d = SepiaData(t_sim=self.multi_data_dict['t_sim'], y_sim=self.multi_data_dict['y_sim'], y_ind_sim=self.multi_data_dict['y_ind_sim'], y_obs=self.multi_data_dict['y_obs'], y_ind_obs=self.multi_data_dict['y_ind_obs']) print('Testing multivariate sim-only SepiaLogLik...', flush=True) print(d, flush=True) d.transform_xt() d.standardize_y() d.create_K_basis(5) model = setup_model(d) model.logLik() for param in model.params.mcmcList: for cindex in range(int(np.prod(param.val_shape))): model.logLik(cvar=param.name, cindex=cindex)
plt.xlabel('y index') plt.ylabel('y') plt.show() #%% data.transform_xt() data.standardize_y('columnwise') data.create_K_basis(5) data.create_D_basis(type='linear') print(data) #%% model = setup_model(data) #%% cachefile_name = 'multivariate_example_with_prediction.pkl' import os.path import pickle use_save_file = False if use_save_file and os.path.isfile(cachefile_name): model = pickle.load(open(cachefile_name, "rb")) else: model.tune_step_sizes(50, 20) model.do_mcmc(1000) if use_save_file:
x_sim = sim_data[:,0:2] # x = {R, rho_ball} t_sim = sim_data[:,2:4] # t = {C, g} with open(datadir+'simHeights101x1','r') as f: h_sim = np.loadtxt(f) with open(datadir+'sims101x80Cg.txt','r') as f: y_sim = np.loadtxt(f).T # create sepia data object data = SepiaData(x_sim = x_sim, t_sim = t_sim, y_ind_sim = h_sim, y_sim = y_sim,\ x_obs = x_obs, y_obs = y_obs, y_ind_obs = h_obs) data.transform_xt() data.standardize_y() data.create_K_basis(3) data.create_D_basis('linear') print(data) model = setup_model(data) #%% Ragged data and model setup y_obs_ragged = [np.array(field_data[0:3,4]),np.array(field_data[3:6,4]),\ np.array(field_data[6:9,4]),np.array(field_data[9:,4])] h_obs_ragged = [np.array(field_data[0:3,3]),np.array(field_data[3:6,3]),\ np.array(field_data[6:9,3]),np.array(field_data[9:,3])]# observed heights #y_obs = [np.array(field_data[0:3,4]),np.array(field_data[3:6,4]),\ # np.array(field_data[[7,9,11],4]),np.array(field_data[12:,4])] #h_obs = [np.array(field_data[0:3,3]),np.array(field_data[3:6,3]),\ # np.array(field_data[[7,9,11],3]),np.array(field_data[12:,3])]# observed heights print(y_obs) print(h_obs)
def test_predict_univ_sim_only(self): np.random.seed(42) show_figs = True # Open data from matlab script_path = os.path.dirname(os.path.realpath(__file__)) mat_fn = '%s/data/univ_sim_only_mcmc_test.mat' % script_path if os.path.isfile(mat_fn): # if the matlab data is already in place, just load that print('Found matfile, loading from univ_sim_only_mcmc_test.mat \n') matfile = scipy.io.loadmat(mat_fn) else: print('Generating matfile univ_sim_only_mcmc_test.mat \n') # Run matlab code, then open data from matlab try: eng = matlab.engine.start_matlab() eng.cd(script_path) eng.addpath('matlab/', nargout=0) eng.univ_sim_only_mcmc_test(nargout=0) eng.quit() matfile = scipy.io.loadmat('mat_fn') except Exception as e: print(e) print('make sure matlab.engine installed') y = matfile['y'] x = matfile['x'] t = matfile['t'] data = SepiaData(x_sim=x, t_sim=t, y_sim=y) data.standardize_y() data.transform_xt() print(data) model = setup_model(data) nsamp = int(matfile['nsamp']) nburn = int(matfile['nburn']) t_start = time() model.do_mcmc(nburn + nsamp) t_end = time() print('Python mcmc time %0.3g s' % (t_end - t_start)) print('Matlab mcmc time %0.3g s' % matfile['mcmc_time']) # Creates dict with each sampled variable name as key, array of samples (nsamp, ...) as value samples = model.get_samples(nburn) log_post = np.array(model.params.lp.mcmc.draws) np.random.seed(42) psamps = model.get_samples(0, sampleset=[0, 1, 2, 3, 4], flat=True) pred = wPred([0.5, 0.5], psamps, model.num, model.data, returnMuSigma=True) print('Samples are:') print(pred.w.squeeze()) print('Matlab Samples are:') print(matfile['pred_w'].squeeze()) print('Mu are:') print(pred.mu.squeeze()) print('Matlab Mu are:') print(matfile['pred_Myhat'].squeeze()) print('Sigma are:') print(pred.sigma.squeeze()) print('Matlab Sigma are:') print(matfile['pred_Syhat'].squeeze()) print('Checking predicted realizations...') self.assertTrue( np.allclose(matfile['pred_w'].squeeze(), pred.w.squeeze())) print('Checking predicted means...') self.assertTrue( np.allclose(matfile['pred_Myhat'].squeeze(), pred.mu.squeeze())) print('Checking predicted sigmas...') self.assertTrue( np.allclose(matfile['pred_Syhat'].squeeze(), pred.sigma.squeeze())) # Prediction with multiple realizations np.random.seed(42) sampleset = np.arange(100, 1001, 100) - 1 samples = model.get_samples(sampleset=sampleset) nq = 10 t = np.linspace(0, 1, nq) xpred = np.column_stack((np.ones((nq, 1)) * 0.5, t)) pred_plot = wPred(xpred, samples, model.num, model.data) print('pred_plot_w are:') print(pred_plot.w.squeeze()[0, :]) print('Matlab pred_plot_w are:') print(matfile['pred_plot_w'].squeeze()[0, :]) print('Checking predicted realizations for plotting...') # Apparently numerics come into play here, need to turn down the rtol on 'close' self.assertTrue( np.allclose(matfile['pred_plot_w'].squeeze(), pred_plot.w.squeeze(), rtol=1e-3)) print('Done.') if show_figs: import matplotlib.pyplot as plt plt.figure() plt.plot(data.sim_data.t_trans, data.sim_data.y_std) plt.plot(np.tile(t, (len(sampleset), 1)), np.squeeze(pred_plot.w), '.') plt.show()
def test_predict_uv_from_multi_obs(self): show_figs = True exclude_burnin = True n_pc = 2 seed = 42. lamWOs_init = 50000. # use 0 to use default lamWOs initial value nsamp = 100 nburn = 0 # Open data from matlab script_path = os.path.dirname(os.path.realpath(__file__)) mat_fn = '%s/data/multi_sim_and_obs_mcmc_test.mat' % script_path if os.path.isfile(mat_fn): # if the matlab data is already in place, just load that print( 'Found matfile, loading from multi_sim_and_obs_mcmc_test.mat \n' ) matfile = scipy.io.loadmat(mat_fn) else: print('Generating matfile multi_sim_and_obs_mcmc_test.mat \n') # Run matlab code, then open data from matlab script_path = os.path.dirname(os.path.realpath(__file__)) # Run matlab code, then open data from matlab try: eng = matlab.engine.start_matlab() eng.cd(script_path) eng.addpath('matlab/', nargout=0) eng.multi_sim_and_obs_mcmc_test(nsamp, nburn, seed, lamWOs_init, n_pc, 0, nargout=0) eng.quit() matfile = scipy.io.loadmat( '%s/data/multi_sim_and_obs_mcmc_test.mat' % script_path) except Exception as e: print(e) print('make sure matlab.engine installed') nburn = int(matfile['nburn']) nsamp = int(matfile['nsamp']) y_sim = matfile['y'].T y_ind_sim = matfile['y_ind'].squeeze() xt_sim = matfile['x'] y_obs = matfile['y_obs'] y_ind_obs = matfile['y_ind_obs'].squeeze() x_obs = matfile['x_obs'] data = SepiaData(x_sim=xt_sim[:, 0][:, None], t_sim=xt_sim[:, 1][:, None], y_sim=y_sim, y_ind_sim=y_ind_sim, x_obs=x_obs, y_obs=y_obs, y_ind_obs=y_ind_obs) data.standardize_y() data.transform_xt() data.create_K_basis(n_pc=n_pc) data.create_D_basis(D=matfile['Dobs'].T) print(data) np.random.seed(int(seed)) model = setup_model(data) if lamWOs_init > 0: model.params.lamWOs.val = np.array([[lamWOs_init]]) t_start = time() model.do_mcmc(nburn + nsamp) t_end = time() print('Python mcmc time %0.3g s' % (t_end - t_start)) print('Matlab mcmc time %0.3g s' % matfile['mcmc_time']) np.random.seed(int(seed)) psamps = model.get_samples(0, sampleset=[0, 4], flat=True) #pred = uvPred([0.5], psamps, model.num, model.data, returnMuSigma=True, useAltW=True) pred = uvPred([0.5], psamps, model.num, model.data, returnMuSigma=True) print('Samples of u are:') print(pred.u.squeeze()) print('Matlab Samples of u are:') print(matfile['pred2_u'].squeeze()) print('Samples of v are:') print(pred.v.squeeze()) print('Matlab Samples of v are:') print(matfile['pred2_v'].squeeze()) print('Mu are:') print(pred.mu.squeeze()) print('Matlab Mu are:') print(matfile['pred2_Myhat']) print('Sigma are:') print(pred.sigma.squeeze().reshape(14, 7).T) print('Matlab Sigma are:') print(matfile['pred2_Syhat'].squeeze()) print('Checking predicted realizations...') self.assertTrue( np.allclose(matfile['pred2_u'].squeeze(), pred.u.squeeze())) self.assertTrue( np.allclose(matfile['pred2_v'].squeeze(), pred.v.squeeze())) print('Checking predicted means...') self.assertTrue( np.allclose(matfile['pred2_Myhat'].squeeze(), pred.mu.squeeze())) print('Checking predicted sigmas...') self.assertTrue( np.allclose(matfile['pred2_Syhat'].squeeze(), pred.sigma.squeeze().reshape(14, 7).T)) print('Done.')
def test_multivariate_sim_and_obs_lamVzGroups_setup(self): """ Tests setup for multivariate sim and obs model with D and lamVzGroups """ d = SepiaData(t_sim=self.data_dict['t_sim'], y_sim=self.data_dict['y_sim'], y_ind_sim=self.data_dict['y_ind_sim'], y_obs=self.data_dict['y_obs'], y_ind_obs=self.data_dict['y_ind_obs']) print( 'Testing multivariate sim and obs SepiaModelSetup with discrep...', flush=True) print(d, flush=True) # Do explicit transformation d.transform_xt() d.standardize_y() d.create_K_basis(n_pc=5) custom_D = np.vstack([ np.ones(d.obs_data.y.shape[1]), d.obs_data.y_ind, d.obs_data.y_ind**2 ]) d.create_D_basis(D_obs=custom_D) lamVzGroup = [0, 1, 1] model = setup_model(d, lamVzGroup=lamVzGroup) # Check model components are set up as expected self.assertTrue(not model.num.scalar_out) self.assertTrue(not model.num.sim_only) self.assertTrue(model.num.m == 100) self.assertTrue(model.num.n == 1) self.assertTrue(model.num.p == 1) self.assertTrue(model.num.q == 3) self.assertTrue(model.num.pu == 5) self.assertTrue(model.num.pv == 3) # Check parameter setup -- betaU betaU = model.params.betaU self.assertTrue(betaU.val_shape == (model.num.q + model.num.p, model.num.pu)) self.assertTrue(betaU.prior.dist == 'Beta') self.assertTrue(betaU.mcmc.stepType == 'BetaRho') # Check parameter setup -- betaV betaV = model.params.betaV self.assertTrue(betaV.val_shape == (1, 2)) self.assertTrue(betaV.prior.dist == 'Beta') self.assertTrue(betaV.mcmc.stepType == 'BetaRho') # Check parameter setup -- lamUz lamUz = model.params.lamUz self.assertTrue(lamUz.val_shape == (1, model.num.pu)) self.assertTrue(lamUz.prior.dist == 'Gamma') self.assertTrue(lamUz.mcmc.stepType == 'PropMH') # Check parameter setup -- lamUz lamVz = model.params.lamVz self.assertTrue(lamVz.val_shape == (1, 2)) self.assertTrue(lamVz.prior.dist == 'Gamma') self.assertTrue(lamVz.mcmc.stepType == 'PropMH') # Check parameter setup -- lamWOs lamWOs = model.params.lamWOs self.assertTrue(lamWOs.val_shape == (1, 1)) self.assertTrue(lamWOs.prior.dist == 'Gamma') self.assertTrue(lamWOs.mcmc.stepType == 'PropMH') # Check parameter setup -- lamWs lamWs = model.params.lamWs self.assertTrue(lamWs.val_shape == (1, model.num.pu)) self.assertTrue(lamWs.prior.dist == 'Gamma') self.assertTrue(lamWs.mcmc.stepType == 'PropMH') # Check parameter setup -- lamOs lamOs = model.params.lamOs self.assertTrue(lamOs.val_shape == (1, 1)) self.assertTrue(lamOs.prior.dist == 'Gamma') self.assertTrue(lamOs.mcmc.stepType == 'PropMH') # Check parameter setup -- theta theta = model.params.theta self.assertTrue(theta.val_shape == (1, model.num.q)) self.assertTrue(theta.prior.dist == 'Normal') self.assertTrue(theta.mcmc.stepType == 'Uniform') self.assertTrue(np.allclose(theta.orig_range[0], 0)) self.assertTrue(np.allclose(theta.orig_range[1], 1)) mcmc_list_names = [p.name for p in model.params.mcmcList] self.assertTrue( set(mcmc_list_names) == set([ 'betaU', 'betaV', 'lamUz', 'lamVz', 'lamWOs', 'lamWs', 'lamOs', 'theta' ]))
def test_multivariate_sim_and_obs_setup(self): """ Tests setup for multivariate sim and obs model with D """ d = SepiaData(t_sim=self.data_dict['t_sim'], y_sim=self.data_dict['y_sim'], y_ind_sim=self.data_dict['y_ind_sim'], y_obs=self.data_dict['y_obs'], y_ind_obs=self.data_dict['y_ind_obs']) print( 'Testing multivariate sim and obs SepiaModelSetup with discrep...', flush=True) print(d, flush=True) # Try it without doing standardization/transform/pc basis to be sure it doesn't break model_notrans = setup_model(copy.deepcopy(d)) # Do explicit transformation d.transform_xt() d.standardize_y() d.create_K_basis(n_pc=5) d.create_D_basis(type='constant') d.create_D_basis(type='linear') custom_D = np.vstack( [np.ones(d.obs_data.y.shape[1]), d.obs_data.y_ind]) d.create_D_basis(D_obs=custom_D) model = setup_model(d) # Check that either way gives same transformation self.assertTrue( np.allclose(model_notrans.data.sim_data.orig_y_mean, model.data.sim_data.orig_y_mean)) self.assertTrue( np.allclose(model_notrans.data.sim_data.orig_y_sd, model.data.sim_data.orig_y_sd)) self.assertTrue( np.allclose(model_notrans.data.sim_data.y_std, model.data.sim_data.y_std)) self.assertTrue( np.allclose(model_notrans.data.sim_data.t_trans, model.data.sim_data.t_trans)) self.assertTrue( np.allclose(model_notrans.data.obs_data.orig_y_mean, model.data.obs_data.orig_y_mean)) self.assertTrue( np.allclose(model_notrans.data.obs_data.orig_y_sd, model.data.obs_data.orig_y_sd)) self.assertTrue( np.allclose(model_notrans.data.obs_data.y_std, model.data.obs_data.y_std)) # Check model components are set up as expected self.assertTrue(not model.num.scalar_out) self.assertTrue(not model.num.sim_only) self.assertTrue(model.num.m == 100) self.assertTrue(model.num.n == 1) self.assertTrue(model.num.p == 1) self.assertTrue(model.num.q == 3) self.assertTrue(model.num.pu == 5) self.assertTrue(model.num.pv == 2) #self.assertTrue(np.allclose(model.num.w, model.data.sim_data.y_std)) # TODO compute projection #self.assertTrue(np.allclose(model.num.u, model.data.obs_data.y_std)) # TODO compute projection # self.assertTrue(np.allclose(model.num.v, model.data.obs_data.y_std)) # TODO compute projection # Check parameter setup -- betaU betaU = model.params.betaU self.assertTrue(betaU.val_shape == (model.num.q + model.num.p, model.num.pu)) self.assertTrue(betaU.prior.dist == 'Beta') self.assertTrue(betaU.mcmc.stepType == 'BetaRho') # Check parameter setup -- betaV betaV = model.params.betaV self.assertTrue(betaV.val_shape == (1, 1)) self.assertTrue(betaV.prior.dist == 'Beta') self.assertTrue(betaV.mcmc.stepType == 'BetaRho') # Check parameter setup -- lamUz lamUz = model.params.lamUz self.assertTrue(lamUz.val_shape == (1, model.num.pu)) self.assertTrue(lamUz.prior.dist == 'Gamma') self.assertTrue(lamUz.mcmc.stepType == 'PropMH') # Check parameter setup -- lamUz lamVz = model.params.lamVz self.assertTrue(lamVz.val_shape == (1, 1)) self.assertTrue(lamVz.prior.dist == 'Gamma') self.assertTrue(lamVz.mcmc.stepType == 'PropMH') # Check parameter setup -- lamWOs lamWOs = model.params.lamWOs self.assertTrue(lamWOs.val_shape == (1, 1)) self.assertTrue(lamWOs.prior.dist == 'Gamma') self.assertTrue(lamWOs.mcmc.stepType == 'PropMH') # Check parameter setup -- lamWs lamWs = model.params.lamWs self.assertTrue(lamWs.val_shape == (1, model.num.pu)) self.assertTrue(lamWs.prior.dist == 'Gamma') self.assertTrue(lamWs.mcmc.stepType == 'PropMH') # Check parameter setup -- lamOs lamOs = model.params.lamOs self.assertTrue(lamOs.val_shape == (1, 1)) self.assertTrue(lamOs.prior.dist == 'Gamma') self.assertTrue(lamOs.mcmc.stepType == 'PropMH') # Check parameter setup -- theta theta = model.params.theta self.assertTrue(theta.val_shape == (1, model.num.q)) self.assertTrue(theta.prior.dist == 'Normal') self.assertTrue(theta.mcmc.stepType == 'Uniform') self.assertTrue(np.allclose(theta.orig_range[0], 0)) self.assertTrue(np.allclose(theta.orig_range[1], 1)) mcmc_list_names = [p.name for p in model.params.mcmcList] self.assertTrue( set(mcmc_list_names) == set([ 'betaU', 'betaV', 'lamUz', 'lamVz', 'lamWOs', 'lamWs', 'lamOs', 'theta' ]))
def test_predict_multi_sim_only(self): show_figs = True exclude_burnin = True n_pc = 2 seed = 42 lamWOs_init = 50000. # use 0 to use default lamWOs initial value list_to_sample = [ 1, 2, 3, 4 ] # 1-based indexing for matlab; order is [betaU, lamUz, lamWs, lamWOs] nsamp = 100 nburn = 10 # Open data from matlab script_path = os.path.dirname(os.path.realpath(__file__)) mat_fn = '%s/data/multi_sim_only_mcmc_test.mat' % script_path if os.path.isfile(mat_fn): # if the matlab data is already in place, just load that print( 'Found matfile, loading from multi_sim_only_mcmc_test.mat \n') matfile = scipy.io.loadmat(mat_fn) else: print('Generating matfile multi_sim_only_mcmc_test.mat \n') # Run matlab code, then open data from matlab list_to_sample = [ 1, 2, 3, 4 ] # 1-based indexing for matlab; order is [betaU, lamUz, lamWs, lamWOs] script_path = os.path.dirname(os.path.realpath(__file__)) # Run matlab code, then open data from matlab try: eng = matlab.engine.start_matlab() eng.cd(script_path) eng.addpath('matlab/', nargout=0) eng.multi_sim_only_mcmc_test(nsamp, nburn, list_to_sample, seed, lamWOs_init, n_pc, nargout=0) eng.quit() matfile = scipy.io.loadmat( '%s/data/multi_sim_only_mcmc_test.mat' % script_path) except Exception as e: print(e) print('make sure matlab.engine installed') y = matfile['y'].T y_ind = matfile['y_ind'].T x = matfile['x'] data = SepiaData(x_sim=x[:, 0][:, None], t_sim=x[:, 1][:, None], y_sim=y, y_ind_sim=y_ind) data.standardize_y() data.transform_xt() data.create_K_basis(n_pc=n_pc) print(data) np.random.seed(int(seed)) model = setup_model(data) if lamWOs_init > 0: model.params.lamWOs.val = np.array([[lamWOs_init]]) model.params.mcmcList = [ model.params.mcmcList[i - 1] for i in list_to_sample ] t_start = time() model.do_mcmc(nburn + nsamp) t_end = time() print('Python mcmc time %0.3g s' % (t_end - t_start)) print('Matlab mcmc time %0.3g s' % matfile['mcmc_time']) np.random.seed(seed) psamps = model.get_samples(0, sampleset=[0, 1, 2, 3, 4], flat=True) pred = wPred([0.5, 0.5], psamps, model.num, model.data, returnMuSigma=True) print('Samples are:') print(pred.w.squeeze()) print('Matlab Samples are:') print(matfile['pred_w'].squeeze()) print('Mu are:') print(pred.mu.squeeze()) print('Matlab Mu are:') print(matfile['pred_Myhat']) print('Sigma are:') print(pred.sigma.squeeze().squeeze().reshape(10, 2).T) print('Matlab Sigma are:') print(matfile['pred_Syhat'].squeeze()) print('Checking predicted realizations...') self.assertTrue( np.allclose(matfile['pred_w'].squeeze(), pred.w.squeeze())) print('Checking predicted means...') self.assertTrue( np.allclose(matfile['pred_Myhat'].squeeze(), pred.mu.squeeze())) print('Checking predicted sigmas...') self.assertTrue( np.allclose(matfile['pred_Syhat'].squeeze(), pred.sigma.squeeze().reshape(10, 2).T)) pred_arv = wPred([0.5, 0.5], psamps, model.num, model.data, addResidVar=True, returnMuSigma=True) print('Add Residual Variance test') print('Samples are:') print(pred_arv.w.squeeze()) print('Matlab Samples are:') print(matfile['pred_arv_w'].squeeze()) print('Mu are:') print(pred_arv.mu.squeeze()) print('Matlab Mu are:') print(matfile['pred_arv_Myhat']) print('Sigma are:') print(pred_arv.sigma.squeeze().squeeze().reshape(10, 2).T) print('Matlab Sigma are:') print(matfile['pred_arv_Syhat'].squeeze()) print('Checking predicted realizations...') self.assertTrue( np.allclose(matfile['pred_arv_w'].squeeze(), pred_arv.w.squeeze())) print('Checking predicted means...') self.assertTrue( np.allclose(matfile['pred_arv_Myhat'].squeeze(), pred_arv.mu.squeeze())) print('Checking predicted sigmas...') self.assertTrue( np.allclose(matfile['pred_arv_Syhat'].squeeze(), pred_arv.sigma.squeeze().reshape(10, 2).T)) print('Done.')
y_ind_obs = matfile['y_ind_obs'].squeeze() x_obs = matfile['x_obs'] data = SepiaData(x_sim=xt_sim[:, 0][:, None], t_sim=xt_sim[:, 1][:, None], y_sim=y_sim, y_ind_sim=y_ind_sim, x_obs=x_obs, y_obs=y_obs, y_ind_obs=y_ind_obs) data.standardize_y() data.transform_xt() data.create_K_basis(n_pc=2) data.create_D_basis(D_obs=matfile['Dobs'].T) print(data) model = setup_model(data) nsamp = int(matfile['nsamp']) nburn = int(matfile['nburn']) @timeit def run_mcmc(): model.do_mcmc(nburn + nsamp) print('Python mcmc time:') run_mcmc() # import cProfile # cProfile.run('run_mcmc()', 'mcmc.profile')
data = SepiaData(t_sim=design, y_sim=y_sim, y_ind_sim=y_ind, y_obs=y_obs, y_ind_obs=y_ind) data.standardize_y() data.transform_xt() data.create_K_basis(n_features - 1) print(data) # Setup model # We have a known observation error Sigy = np.diag( np.squeeze( (0.01 * np.ones(n_features) * y_obs) / data.sim_data.orig_y_sd**2)) model = setup_model(data, Sigy) # Modify priors to match Matlab model.params.lamWs.prior.bounds[1] = np.inf model.params.lamWs.prior.params = [np.ones((1, 11)), np.zeros((1, 11))] # Do mcmc model.tune_step_sizes(100, 25) model.do_mcmc(10000) samples_dict = model.get_samples() with open('data/sepia_mcmc_samples1-5000.pkl', 'wb') as f: pickle.dump(samples_dict, f) with open('data/sepia_model.pkl', 'wb') as f: pickle.dump(model, f)