Beispiel #1
0
    def test_univariate_sim_only_setup(self):
        """
        Tests setup for univariate sim only model
        """

        d = SepiaData(t_sim=self.data_dict['t_sim'], y_sim=self.data_dict['y_sim'])
        print('Testing univariate sim-only SepiaModelSetup...', flush=True)
        print(d, flush=True)

        # Try it without doing standardization/transform to be sure it doesn't break
        model_notrans = setup_model(copy.deepcopy(d))

        # Do explicit transformation
        d.transform_xt()
        d.standardize_y()
        model = setup_model(d)

        # Check that either way gives same transformation
        self.assertTrue(np.allclose(model_notrans.data.sim_data.orig_y_mean, model.data.sim_data.orig_y_mean))
        self.assertTrue(np.allclose(model_notrans.data.sim_data.orig_y_sd, model.data.sim_data.orig_y_sd))
        self.assertTrue(np.allclose(model_notrans.data.sim_data.y_std, model.data.sim_data.y_std))
        self.assertTrue(np.allclose(model_notrans.data.sim_data.t_trans, model.data.sim_data.t_trans))

        # Check model components are set up as expected
        self.assertTrue(model.num.scalar_out)
        self.assertTrue(model.num.sim_only)
        self.assertTrue(model.num.m == 100)
        self.assertTrue(model.num.n == 0)
        self.assertTrue(model.num.p == 1)
        self.assertTrue(model.num.q == 1)
        self.assertTrue(model.num.pu == 1)
        self.assertTrue(model.num.pv == 0)
        self.assertTrue(np.allclose(model.num.w, model.data.sim_data.y_std))

        # Check parameter setup -- betaU
        betaU = model.params.betaU
        self.assertTrue(betaU.val_shape == (2, 1))
        self.assertTrue(betaU.prior.dist == 'Beta')
        self.assertTrue(betaU.mcmc.stepType == 'BetaRho')

        # Check parameter setup -- lamUz
        lamUz = model.params.lamUz
        self.assertTrue(lamUz.val_shape == (1, 1))
        self.assertTrue(lamUz.prior.dist == 'Gamma')
        self.assertTrue(lamUz.mcmc.stepType == 'PropMH')

        # Check parameter setup -- lamWOs
        lamWOs = model.params.lamWOs
        self.assertTrue(lamWOs.val_shape == (1, 1))
        self.assertTrue(lamWOs.prior.dist == 'Gamma')
        self.assertTrue(lamWOs.mcmc.stepType == 'PropMH')

        # Check parameter setup -- lamWs
        lamWs = model.params.lamWs
        self.assertTrue(lamWs.val_shape == (1, 1))
        self.assertTrue(lamWs.prior.dist == 'Gamma')
        self.assertTrue(lamWs.mcmc.stepType == 'PropMH')

        mcmc_list_names = [p.name for p in model.params.mcmcList]
        self.assertTrue(set(mcmc_list_names) == set(['betaU', 'lamUz', 'lamWOs', 'lamWs']))
def setup_univ_sim_and_obs(m=100, n=50, seed=42., n_lik=0, n_mcmc=0, n_pred=0):
    try:
        eng = matlab.engine.start_matlab()
        eng.cd(root_path)
        eng.addpath('matlab/', nargout=0)
        res = eng.setup_univ_sim_and_obs(m,
                                         n,
                                         seed,
                                         n_lik,
                                         n_mcmc,
                                         n_pred,
                                         nargout=1)
        eng.quit()
    except Exception as e:
        print(e)
        print(
            'Matlab error; make sure matlab.engine installed, check Matlab code for errors.'
        )
    y = np.array(res['y'], dtype=float)
    xt = np.array(res['xt'], dtype=float)
    y_obs = np.array(res['y_obs'], dtype=float)
    x_obs = np.array(res['x_obs'], dtype=float).reshape((n, 1))
    data = SepiaData(x_sim=xt[:, 0][:, None],
                     t_sim=xt[:, 1][:, None],
                     y_sim=y,
                     x_obs=x_obs,
                     y_obs=y_obs)
    data.standardize_y()
    data.transform_xt()
    print(data)
    model = setup_model(data)
    return model, res
def setup_multi_sim_and_obs_sharedtheta(m=100,
                                        n=10,
                                        nt_sim=20,
                                        nt_obs=15,
                                        noise_sd=0.1,
                                        nx=5,
                                        n_pc=10,
                                        seed=42.,
                                        n_lik=0,
                                        n_mcmc=0,
                                        n_pred=0,
                                        n_shared=2,
                                        clist=[],
                                        fix_K=False):
    try:
        eng = matlab.engine.start_matlab()
        eng.cd(root_path)
        eng.addpath('matlab/', nargout=0)
        res = eng.setup_multi_sim_and_obs_sharedtheta(m,
                                                      n,
                                                      nt_sim,
                                                      nt_obs,
                                                      noise_sd,
                                                      nx,
                                                      n_pc,
                                                      seed,
                                                      n_lik,
                                                      n_mcmc,
                                                      n_pred,
                                                      n_shared,
                                                      matlab.double(clist),
                                                      nargout=1)
        eng.quit()
    except Exception as e:
        print(e)
        print(
            'Matlab error; make sure matlab.engine installed, check Matlab code for errors.'
        )
    y = np.array(res['y'], dtype=float)  # (m, nt_sim, n_shared)
    y_ind = np.array(res['y_ind'], dtype=float).squeeze()  # (nt_sim, n_shared)
    xt = np.array(res['xt'], dtype=float)  # (m, nx, n_shared)
    y_obs = np.array(res['y_obs'], dtype=float)  # (n, nt_sim, n_shared)
    y_ind_obs = np.array(res['y_ind_obs'],
                         dtype=float).squeeze()  # (nt_obs, n_shared)
    x_obs = np.array(res['x_obs'], dtype=float)  # (n, 1, n_shared)
    model_list = []
    for i in range(n_shared):
        data = SepiaData(x_sim=xt[:, 0, i][:, None],
                         t_sim=xt[:, 1:, i],
                         y_sim=y[:, :, i],
                         y_ind_sim=y_ind[:, i],
                         x_obs=x_obs[:, :, i],
                         y_obs=y_obs[:, :, i],
                         y_ind_obs=y_ind_obs[:, i])
        data.standardize_y()
        data.transform_xt()
        data.create_K_basis(n_pc)
        model = setup_model(data)
        model_list.append(model)
    return model_list, res
    def setUp(self,
              m=100,
              n=1,
              nt_sim=50,
              nt_obs=20,
              n_theta=3,
              n_basis=5,
              sig_n=0.1,
              seed=42):
        n_hier = 3
        self.hier_idx = np.array([[0, 0, 0]])
        #self.hier_idx = np.array([[1, 1, 1], [2, -1, 2]]) # TODO this fails for multivariate; cant use for univariate now
        multi_data_list = []
        univ_data_list = []
        for si in range(n_hier):
            multi_data_dict = generate_data.generate_multi_sim_and_obs(
                m=m,
                n=n,
                nt_sim=nt_sim,
                nt_obs=nt_obs,
                n_theta=n_theta,
                n_basis=n_basis,
                sig_n=sig_n,
                seed=seed)
            univ_data_dict = generate_data.generate_univ_sim_and_obs(
                m=m, n=n, sig_n=sig_n, seed=seed)

            d = SepiaData(t_sim=univ_data_dict['t_sim'],
                          y_sim=univ_data_dict['y_sim'],
                          y_obs=univ_data_dict['y_obs'])
            d.transform_xt()
            d.standardize_y()
            univ_data_list.append(d)

            d = SepiaData(t_sim=multi_data_dict['t_sim'],
                          y_sim=multi_data_dict['y_sim'],
                          y_ind_sim=multi_data_dict['y_ind_sim'],
                          y_obs=multi_data_dict['y_obs'],
                          y_ind_obs=multi_data_dict['y_ind_obs'])
            d.transform_xt()
            d.standardize_y()
            d.create_K_basis(5)
            d.create_D_basis('constant')
            multi_data_list.append(d)

        self.univ_model_list = [setup_model(d) for d in univ_data_list]
        self.multi_model_list = [setup_model(d) for d in multi_data_list]
def setup_multi_sim_and_obs(m=100,
                            n=10,
                            nt_sim=20,
                            nt_obs=15,
                            noise_sd=0.1,
                            nx=5,
                            n_pc=10,
                            seed=42.,
                            n_lik=0,
                            n_mcmc=0,
                            n_pred=0,
                            fix_K=False):
    try:
        eng = matlab.engine.start_matlab()
        eng.cd(root_path)
        eng.addpath('matlab/', nargout=0)
        res = eng.setup_multi_sim_and_obs(m,
                                          n,
                                          nt_sim,
                                          nt_obs,
                                          noise_sd,
                                          nx,
                                          n_pc,
                                          seed,
                                          n_lik,
                                          n_mcmc,
                                          n_pred,
                                          nargout=1)
        eng.quit()
    except Exception as e:
        print(e)
        print(
            'Matlab error; make sure matlab.engine installed, check Matlab code for errors.'
        )
    y = np.array(res['y'], dtype=float)
    y_ind = np.array(res['y_ind'], dtype=float).squeeze()
    xt = np.array(res['xt'], dtype=float)
    y_obs = np.array(res['y_obs'], dtype=float)
    y_ind_obs = np.array(res['y_ind_obs'], dtype=float).squeeze()
    x_obs = np.array(res['x_obs'], dtype=float)
    data = SepiaData(x_sim=xt[:, 0][:, None],
                     t_sim=xt[:, 1:],
                     y_sim=y,
                     y_ind_sim=y_ind,
                     x_obs=x_obs,
                     y_obs=y_obs,
                     y_ind_obs=y_ind_obs)
    data.standardize_y()
    data.transform_xt()
    if fix_K:  # means use the K from matlab - avoid issues with positive/negative component ambiguity
        data.create_K_basis(n_pc, K=np.array(res['K']).T)
    else:
        data.create_K_basis(n_pc)
    data.create_D_basis('constant')
    print(data)
    model = setup_model(data)
    return model, res
Beispiel #6
0
def create_test_case():
    n_obs = 2
    n_sim = 5
    p = 3
    q = 4
    ell_sim = 80
    ell_obs = 20
    t = np.random.uniform(0, 1, (n_sim, q))
    x = 0.5 * np.ones((n_sim, p))
    y_ind = np.linspace(0, 100, ell_sim)
    y = 10 * np.random.normal(
        0, 1,
        (n_sim, 1)) * (y_ind[None, :] - 50)**2 / 75. + 20 * np.random.normal(
            0, 1, (n_sim, 1)) * y_ind[None, :] + 20 * np.random.normal(
                0, 1, (n_sim, 1))

    x_obs = 0.5 * np.ones((n_obs, p))
    y_obs_ind = np.linspace(10, 85, ell_obs)
    y_obs = 10 * np.random.normal(0, 1, (n_obs, 1)) * (
        y_obs_ind[None, :] - 50)**2 / 75. + 20 * np.random.normal(
            0, 1, (n_obs, 1)) * y_obs_ind[None, :] + 20 * np.random.normal(
                0, 1, (n_obs, 1))

    data = SepiaData(x_sim=x,
                     t_sim=t,
                     y_sim=y,
                     y_ind_sim=y_ind,
                     x_obs=x_obs,
                     y_obs=y_obs,
                     y_ind_obs=y_obs_ind)
    data.standardize_y()
    data.transform_xt()
    data.create_K_basis(n_pc=3)
    data.create_D_basis('constant')

    # Save as matfile for testing in matlab
    savedict = {
        't': t,
        'y': y,
        'y_obs': y_obs,
        'D': data.obs_data.D,
        'Kobs': data.obs_data.K,
        'Ksim': data.sim_data.K,
        'y_obs_std': data.obs_data.y_std,
        'y_sim_std': data.sim_data.y_std,
        'y_sd': data.sim_data.orig_y_sd
    }
    scipy.io.savemat('data/test_case_matlab.mat', savedict)

    g = setup_model(data)

    # Save pickle file of results
    savedict = {'model': g, 'data': data}
    with open('data/test_case_python_model.pkl', 'wb') as f:
        pickle.dump(savedict, f)
    def setUp(self, m=100, n=1, nt_sim=50, nt_obs=20, n_theta=3, n_basis=5, sig_n=0.1, seed=42):
        multi_data_dict = generate_data.generate_multi_sim_and_obs(m=m, n=n, nt_sim=nt_sim, nt_obs=nt_obs,
                                                                   n_theta=n_theta, n_basis=n_basis,
                                                                   sig_n=sig_n, seed=seed)
        univ_data_dict = generate_data.generate_univ_sim_and_obs(m=m, n=n, sig_n=sig_n, seed=seed)

        d = SepiaData(t_sim=univ_data_dict['t_sim'], y_sim=univ_data_dict['y_sim'])
        d.transform_xt()
        d.standardize_y()
        self.univ_sim_only_model = setup_model(d)

        d = SepiaData(t_sim=univ_data_dict['t_sim'], y_sim=univ_data_dict['y_sim'], y_obs=univ_data_dict['y_obs'])
        d.transform_xt()
        d.standardize_y()
        self.univ_sim_and_obs_model = setup_model(d)

        d = SepiaData(t_sim=multi_data_dict['t_sim'], y_sim=multi_data_dict['y_sim'],
                      y_ind_sim=multi_data_dict['y_ind_sim'])
        d.transform_xt()
        d.standardize_y()
        d.create_K_basis(5)
        self.multi_sim_only_model = setup_model(d)

        d = SepiaData(t_sim=multi_data_dict['t_sim'], y_sim=multi_data_dict['y_sim'],
                      y_ind_sim=multi_data_dict['y_ind_sim'], y_obs=multi_data_dict['y_obs'],
                      y_ind_obs=multi_data_dict['y_ind_obs'])
        d.transform_xt()
        d.standardize_y()
        d.create_K_basis(5)
        self.multi_sim_and_obs_noD_model = setup_model(d)

        d = SepiaData(t_sim=multi_data_dict['t_sim'], y_sim=multi_data_dict['y_sim'],
                      y_ind_sim=multi_data_dict['y_ind_sim'], y_obs=multi_data_dict['y_obs'],
                      y_ind_obs=multi_data_dict['y_ind_obs'])
        d.transform_xt()
        d.standardize_y()
        d.create_K_basis(5)
        d.create_D_basis('linear')
        self.multi_sim_and_obs_model = setup_model(d)
    def test_multivariate_sim_and_obs_ragged_setup(self):
        m = 700  # number of simulated observations
        p = 3  # dimension of x (simulation inputs)
        ell_sim = 1000  # dimension of y output sim
        pu = 3  # number of PCs
        q = 2  # dimension of t (extra sim inputs)
        n = 5  # number of observed observations

        ell_obs = np.random.randint(100, 600, n)

        y_ind_sim = np.linspace(0, 100, ell_sim)
        K_true_sim = np.vstack([
            0.5 * (np.sin(y_ind_sim) + 1),
            np.square(-y_ind_sim + 50) / 2500, y_ind_sim / 100
        ])
        y_sim = np.transpose(
            np.log(1 + y_ind_sim)[:, None] + np.dot(
                K_true_sim.T, 2 * np.array([1, 0.5, 0.2])[:, None] *
                np.random.normal(0, 1, (pu, m))))
        x_sim = 0.5 * np.random.uniform(-1, 3, (m, p))
        t = np.random.uniform(-10, 10, (m, q))

        y_ind_obs = [
            np.linspace(0, 100, ell_obs[i]) +
            np.random.uniform(-3, 3, ell_obs[i]) for i in range(len(ell_obs))
        ]
        for yi in y_ind_obs:
            yi[yi < 0] = 0
        K_true_obs = [
            np.vstack(
                [0.5 * (np.sin(yi) + 1),
                 np.square(-yi + 50) / 2500, yi / 100]) for yi in y_ind_obs
        ]
        y_obs = [
            10 + np.squeeze(
                np.log(1 + y_ind_obs[i])[:, None] + np.dot(
                    K_true_obs[i].T, 2 * np.array([1, 0.5, 0.2])[:, None] *
                    np.random.normal(0, 1, (pu, 1))))
            for i in range(len(y_ind_obs))
        ]
        x_obs = 0.5 * np.random.uniform(-1, 3, (n, p))

        d = SepiaData(x_sim=x_sim,
                      y_sim=y_sim,
                      t_sim=t,
                      y_ind_sim=y_ind_sim,
                      x_obs=x_obs,
                      y_obs=y_obs,
                      y_ind_obs=y_ind_obs)

        model = setup_model(d)
def setup_multi_sim_and_obs_noD(m=100,
                                n=10,
                                nt_sim=20,
                                nt_obs=15,
                                noise_sd=0.1,
                                nx=5,
                                n_pc=10,
                                seed=42.,
                                n_lik=0,
                                n_mcmc=0):
    try:
        eng = matlab.engine.start_matlab()
        eng.cd(root_path)
        eng.addpath('matlab/', nargout=0)
        res = eng.setup_multi_sim_and_obs_noD(m,
                                              n,
                                              nt_sim,
                                              nt_obs,
                                              noise_sd,
                                              nx,
                                              n_pc,
                                              seed,
                                              n_lik,
                                              n_mcmc,
                                              nargout=1)
        eng.quit()
    except Exception as e:
        print(e)
        print(
            'Matlab error; make sure matlab.engine installed, check Matlab code for errors.'
        )
    y = np.array(res['y'], dtype=float)
    y_ind = np.array(res['y_ind'], dtype=float).squeeze()
    xt = np.array(res['xt'], dtype=float)
    y_obs = np.array(res['y_obs'], dtype=float)
    y_ind_obs = np.array(res['y_ind_obs'], dtype=float).squeeze()
    x_obs = np.array(res['x_obs'], dtype=float)
    data = SepiaData(x_sim=xt[:, 0][:, None],
                     t_sim=xt[:, 1:],
                     y_sim=y,
                     y_ind_sim=y_ind,
                     x_obs=x_obs,
                     y_obs=y_obs,
                     y_ind_obs=y_ind_obs)
    data.standardize_y()
    data.transform_xt()
    data.create_K_basis(n_pc)
    print(data)
    model = setup_model(data)
    return model, res
Beispiel #10
0
    def test_univariate_sim_only_lik(self):
        """
        Tests log lik for univariate sim only model
        """

        d = SepiaData(t_sim=self.univ_data_dict['t_sim'],
                      y_sim=self.univ_data_dict['y_sim'])
        print('Testing univariate sim-only SepiaLogLik...', flush=True)
        print(d, flush=True)

        d.transform_xt()
        d.standardize_y()
        model = setup_model(d)

        model.logLik()

        for param in model.params.mcmcList:
            for cindex in range(int(np.prod(param.val_shape))):
                model.logLik(cvar=param.name, cindex=cindex)
def setup_multi_sim_only(m=300,
                         nt=20,
                         nx=5,
                         n_pc=10,
                         seed=42.,
                         n_lik=0,
                         n_mcmc=0,
                         n_pred=0,
                         fix_K=False):
    try:
        eng = matlab.engine.start_matlab()
        eng.cd(root_path)
        eng.addpath('matlab/', nargout=0)
        res = eng.setup_multi_sim_only(m,
                                       nt,
                                       nx,
                                       n_pc,
                                       seed,
                                       n_lik,
                                       n_mcmc,
                                       n_pred,
                                       nargout=1)
        eng.quit()
    except Exception as e:
        print(e)
        print(
            'Matlab error; make sure matlab.engine installed, check Matlab code for errors.'
        )
    y = np.array(res['y'], dtype=float)
    y_ind = np.array(res['y_ind'], dtype=float).squeeze()
    xt = np.array(res['xt'], dtype=float)
    data = SepiaData(x_sim=xt[:, 0][:, None],
                     t_sim=xt[:, 1:],
                     y_sim=y,
                     y_ind_sim=y_ind)
    data.standardize_y()
    data.transform_xt()
    data.create_K_basis(n_pc)
    if fix_K:
        data.sim_data.K = np.array(res['K']).T
    print(data)
    model = setup_model(data)
    return model, res
Beispiel #12
0
    def test_multivariate_sim_and_obs_noD_lik(self):
        """
        Tests log lik for multivariate sim and obs model no discrep
        """

        d = SepiaData(t_sim=self.multi_data_dict['t_sim'],
                      y_sim=self.multi_data_dict['y_sim'],
                      y_ind_sim=self.multi_data_dict['y_ind_sim'],
                      y_obs=self.multi_data_dict['y_obs'],
                      y_ind_obs=self.multi_data_dict['y_ind_obs'])
        print('Testing multivariate sim-only SepiaLogLik...', flush=True)
        print(d, flush=True)

        d.transform_xt()
        d.standardize_y()
        d.create_K_basis(5)
        model = setup_model(d)

        model.logLik()

        for param in model.params.mcmcList:
            for cindex in range(int(np.prod(param.val_shape))):
                model.logLik(cvar=param.name, cindex=cindex)
Beispiel #13
0
plt.xlabel('y index')
plt.ylabel('y')
plt.show()

#%%

data.transform_xt()
data.standardize_y('columnwise')
data.create_K_basis(5)
data.create_D_basis(type='linear')

print(data)

#%%

model = setup_model(data)

#%%

cachefile_name = 'multivariate_example_with_prediction.pkl'
import os.path
import pickle

use_save_file = False

if use_save_file and os.path.isfile(cachefile_name):
    model = pickle.load(open(cachefile_name, "rb"))
else:
    model.tune_step_sizes(50, 20)
    model.do_mcmc(1000)
    if use_save_file:
x_sim = sim_data[:,0:2] # x = {R, rho_ball}
t_sim = sim_data[:,2:4] # t = {C, g}
with open(datadir+'simHeights101x1','r') as f:
    h_sim = np.loadtxt(f)
with open(datadir+'sims101x80Cg.txt','r') as f:
    y_sim = np.loadtxt(f).T

# create sepia data object
data = SepiaData(x_sim = x_sim, t_sim = t_sim, y_ind_sim = h_sim, y_sim = y_sim,\
                 x_obs = x_obs, y_obs = y_obs, y_ind_obs = h_obs)
data.transform_xt()
data.standardize_y()
data.create_K_basis(3)
data.create_D_basis('linear')
print(data)
model = setup_model(data)

#%% Ragged data and model setup
y_obs_ragged = [np.array(field_data[0:3,4]),np.array(field_data[3:6,4]),\
         np.array(field_data[6:9,4]),np.array(field_data[9:,4])]
h_obs_ragged = [np.array(field_data[0:3,3]),np.array(field_data[3:6,3]),\
         np.array(field_data[6:9,3]),np.array(field_data[9:,3])]# observed heights

#y_obs = [np.array(field_data[0:3,4]),np.array(field_data[3:6,4]),\
#         np.array(field_data[[7,9,11],4]),np.array(field_data[12:,4])]
#h_obs = [np.array(field_data[0:3,3]),np.array(field_data[3:6,3]),\
#         np.array(field_data[[7,9,11],3]),np.array(field_data[12:,3])]# observed heights

print(y_obs)
print(h_obs)
Beispiel #15
0
    def test_predict_univ_sim_only(self):

        np.random.seed(42)

        show_figs = True

        # Open data from matlab
        script_path = os.path.dirname(os.path.realpath(__file__))
        mat_fn = '%s/data/univ_sim_only_mcmc_test.mat' % script_path
        if os.path.isfile(mat_fn):
            # if the matlab data is already in place, just load that
            print('Found matfile, loading from univ_sim_only_mcmc_test.mat \n')
            matfile = scipy.io.loadmat(mat_fn)
        else:
            print('Generating matfile univ_sim_only_mcmc_test.mat \n')
            # Run matlab code, then open data from matlab
            try:
                eng = matlab.engine.start_matlab()
                eng.cd(script_path)
                eng.addpath('matlab/', nargout=0)
                eng.univ_sim_only_mcmc_test(nargout=0)
                eng.quit()
                matfile = scipy.io.loadmat('mat_fn')
            except Exception as e:
                print(e)
                print('make sure matlab.engine installed')

        y = matfile['y']
        x = matfile['x']
        t = matfile['t']
        data = SepiaData(x_sim=x, t_sim=t, y_sim=y)
        data.standardize_y()
        data.transform_xt()
        print(data)
        model = setup_model(data)
        nsamp = int(matfile['nsamp'])
        nburn = int(matfile['nburn'])
        t_start = time()
        model.do_mcmc(nburn + nsamp)
        t_end = time()
        print('Python mcmc time %0.3g s' % (t_end - t_start))
        print('Matlab mcmc time %0.3g s' % matfile['mcmc_time'])
        # Creates dict with each sampled variable name as key, array of samples (nsamp, ...) as value
        samples = model.get_samples(nburn)
        log_post = np.array(model.params.lp.mcmc.draws)

        np.random.seed(42)
        psamps = model.get_samples(0, sampleset=[0, 1, 2, 3, 4], flat=True)
        pred = wPred([0.5, 0.5],
                     psamps,
                     model.num,
                     model.data,
                     returnMuSigma=True)
        print('Samples are:')
        print(pred.w.squeeze())
        print('Matlab Samples are:')
        print(matfile['pred_w'].squeeze())

        print('Mu are:')
        print(pred.mu.squeeze())
        print('Matlab Mu are:')
        print(matfile['pred_Myhat'].squeeze())

        print('Sigma are:')
        print(pred.sigma.squeeze())
        print('Matlab Sigma are:')
        print(matfile['pred_Syhat'].squeeze())

        print('Checking predicted realizations...')
        self.assertTrue(
            np.allclose(matfile['pred_w'].squeeze(), pred.w.squeeze()))
        print('Checking predicted means...')
        self.assertTrue(
            np.allclose(matfile['pred_Myhat'].squeeze(), pred.mu.squeeze()))
        print('Checking predicted sigmas...')
        self.assertTrue(
            np.allclose(matfile['pred_Syhat'].squeeze(), pred.sigma.squeeze()))

        # Prediction with multiple realizations
        np.random.seed(42)
        sampleset = np.arange(100, 1001, 100) - 1
        samples = model.get_samples(sampleset=sampleset)
        nq = 10
        t = np.linspace(0, 1, nq)
        xpred = np.column_stack((np.ones((nq, 1)) * 0.5, t))
        pred_plot = wPred(xpred, samples, model.num, model.data)

        print('pred_plot_w are:')
        print(pred_plot.w.squeeze()[0, :])
        print('Matlab pred_plot_w are:')
        print(matfile['pred_plot_w'].squeeze()[0, :])

        print('Checking predicted realizations for plotting...')
        # Apparently numerics come into play here, need to turn down the rtol on 'close'
        self.assertTrue(
            np.allclose(matfile['pred_plot_w'].squeeze(),
                        pred_plot.w.squeeze(),
                        rtol=1e-3))

        print('Done.')

        if show_figs:
            import matplotlib.pyplot as plt
            plt.figure()
            plt.plot(data.sim_data.t_trans, data.sim_data.y_std)
            plt.plot(np.tile(t, (len(sampleset), 1)), np.squeeze(pred_plot.w),
                     '.')
            plt.show()
Beispiel #16
0
    def test_predict_uv_from_multi_obs(self):

        show_figs = True
        exclude_burnin = True
        n_pc = 2
        seed = 42.
        lamWOs_init = 50000.  # use 0 to use default lamWOs initial value
        nsamp = 100
        nburn = 0
        # Open data from matlab
        script_path = os.path.dirname(os.path.realpath(__file__))
        mat_fn = '%s/data/multi_sim_and_obs_mcmc_test.mat' % script_path
        if os.path.isfile(mat_fn):
            # if the matlab data is already in place, just load that
            print(
                'Found matfile, loading from multi_sim_and_obs_mcmc_test.mat \n'
            )
            matfile = scipy.io.loadmat(mat_fn)
        else:
            print('Generating matfile multi_sim_and_obs_mcmc_test.mat \n')
            # Run matlab code, then open data from matlab
            script_path = os.path.dirname(os.path.realpath(__file__))

            # Run matlab code, then open data from matlab
            try:
                eng = matlab.engine.start_matlab()
                eng.cd(script_path)
                eng.addpath('matlab/', nargout=0)
                eng.multi_sim_and_obs_mcmc_test(nsamp,
                                                nburn,
                                                seed,
                                                lamWOs_init,
                                                n_pc,
                                                0,
                                                nargout=0)
                eng.quit()
                matfile = scipy.io.loadmat(
                    '%s/data/multi_sim_and_obs_mcmc_test.mat' % script_path)
            except Exception as e:
                print(e)
                print('make sure matlab.engine installed')

        nburn = int(matfile['nburn'])
        nsamp = int(matfile['nsamp'])

        y_sim = matfile['y'].T
        y_ind_sim = matfile['y_ind'].squeeze()
        xt_sim = matfile['x']
        y_obs = matfile['y_obs']
        y_ind_obs = matfile['y_ind_obs'].squeeze()
        x_obs = matfile['x_obs']
        data = SepiaData(x_sim=xt_sim[:, 0][:, None],
                         t_sim=xt_sim[:, 1][:, None],
                         y_sim=y_sim,
                         y_ind_sim=y_ind_sim,
                         x_obs=x_obs,
                         y_obs=y_obs,
                         y_ind_obs=y_ind_obs)
        data.standardize_y()
        data.transform_xt()
        data.create_K_basis(n_pc=n_pc)
        data.create_D_basis(D=matfile['Dobs'].T)
        print(data)

        np.random.seed(int(seed))
        model = setup_model(data)
        if lamWOs_init > 0:
            model.params.lamWOs.val = np.array([[lamWOs_init]])

        t_start = time()
        model.do_mcmc(nburn + nsamp)
        t_end = time()
        print('Python mcmc time %0.3g s' % (t_end - t_start))
        print('Matlab mcmc time %0.3g s' % matfile['mcmc_time'])

        np.random.seed(int(seed))
        psamps = model.get_samples(0, sampleset=[0, 4], flat=True)
        #pred = uvPred([0.5], psamps, model.num, model.data, returnMuSigma=True, useAltW=True)
        pred = uvPred([0.5], psamps, model.num, model.data, returnMuSigma=True)
        print('Samples of u are:')
        print(pred.u.squeeze())
        print('Matlab Samples of u are:')
        print(matfile['pred2_u'].squeeze())

        print('Samples of v are:')
        print(pred.v.squeeze())
        print('Matlab Samples of v are:')
        print(matfile['pred2_v'].squeeze())

        print('Mu are:')
        print(pred.mu.squeeze())
        print('Matlab Mu are:')
        print(matfile['pred2_Myhat'])

        print('Sigma are:')
        print(pred.sigma.squeeze().reshape(14, 7).T)
        print('Matlab Sigma are:')
        print(matfile['pred2_Syhat'].squeeze())

        print('Checking predicted realizations...')
        self.assertTrue(
            np.allclose(matfile['pred2_u'].squeeze(), pred.u.squeeze()))
        self.assertTrue(
            np.allclose(matfile['pred2_v'].squeeze(), pred.v.squeeze()))
        print('Checking predicted means...')
        self.assertTrue(
            np.allclose(matfile['pred2_Myhat'].squeeze(), pred.mu.squeeze()))
        print('Checking predicted sigmas...')
        self.assertTrue(
            np.allclose(matfile['pred2_Syhat'].squeeze(),
                        pred.sigma.squeeze().reshape(14, 7).T))

        print('Done.')
Beispiel #17
0
    def test_multivariate_sim_and_obs_lamVzGroups_setup(self):
        """
        Tests setup for multivariate sim and obs model with D and lamVzGroups
        """

        d = SepiaData(t_sim=self.data_dict['t_sim'],
                      y_sim=self.data_dict['y_sim'],
                      y_ind_sim=self.data_dict['y_ind_sim'],
                      y_obs=self.data_dict['y_obs'],
                      y_ind_obs=self.data_dict['y_ind_obs'])
        print(
            'Testing multivariate sim and obs SepiaModelSetup with discrep...',
            flush=True)
        print(d, flush=True)

        # Do explicit transformation
        d.transform_xt()
        d.standardize_y()
        d.create_K_basis(n_pc=5)
        custom_D = np.vstack([
            np.ones(d.obs_data.y.shape[1]), d.obs_data.y_ind,
            d.obs_data.y_ind**2
        ])
        d.create_D_basis(D_obs=custom_D)
        lamVzGroup = [0, 1, 1]
        model = setup_model(d, lamVzGroup=lamVzGroup)

        # Check model components are set up as expected
        self.assertTrue(not model.num.scalar_out)
        self.assertTrue(not model.num.sim_only)
        self.assertTrue(model.num.m == 100)
        self.assertTrue(model.num.n == 1)
        self.assertTrue(model.num.p == 1)
        self.assertTrue(model.num.q == 3)
        self.assertTrue(model.num.pu == 5)
        self.assertTrue(model.num.pv == 3)

        # Check parameter setup -- betaU
        betaU = model.params.betaU
        self.assertTrue(betaU.val_shape == (model.num.q + model.num.p,
                                            model.num.pu))
        self.assertTrue(betaU.prior.dist == 'Beta')
        self.assertTrue(betaU.mcmc.stepType == 'BetaRho')

        # Check parameter setup -- betaV
        betaV = model.params.betaV
        self.assertTrue(betaV.val_shape == (1, 2))
        self.assertTrue(betaV.prior.dist == 'Beta')
        self.assertTrue(betaV.mcmc.stepType == 'BetaRho')

        # Check parameter setup -- lamUz
        lamUz = model.params.lamUz
        self.assertTrue(lamUz.val_shape == (1, model.num.pu))
        self.assertTrue(lamUz.prior.dist == 'Gamma')
        self.assertTrue(lamUz.mcmc.stepType == 'PropMH')

        # Check parameter setup -- lamUz
        lamVz = model.params.lamVz
        self.assertTrue(lamVz.val_shape == (1, 2))
        self.assertTrue(lamVz.prior.dist == 'Gamma')
        self.assertTrue(lamVz.mcmc.stepType == 'PropMH')

        # Check parameter setup -- lamWOs
        lamWOs = model.params.lamWOs
        self.assertTrue(lamWOs.val_shape == (1, 1))
        self.assertTrue(lamWOs.prior.dist == 'Gamma')
        self.assertTrue(lamWOs.mcmc.stepType == 'PropMH')

        # Check parameter setup -- lamWs
        lamWs = model.params.lamWs
        self.assertTrue(lamWs.val_shape == (1, model.num.pu))
        self.assertTrue(lamWs.prior.dist == 'Gamma')
        self.assertTrue(lamWs.mcmc.stepType == 'PropMH')

        # Check parameter setup -- lamOs
        lamOs = model.params.lamOs
        self.assertTrue(lamOs.val_shape == (1, 1))
        self.assertTrue(lamOs.prior.dist == 'Gamma')
        self.assertTrue(lamOs.mcmc.stepType == 'PropMH')

        # Check parameter setup -- theta
        theta = model.params.theta
        self.assertTrue(theta.val_shape == (1, model.num.q))
        self.assertTrue(theta.prior.dist == 'Normal')
        self.assertTrue(theta.mcmc.stepType == 'Uniform')
        self.assertTrue(np.allclose(theta.orig_range[0], 0))
        self.assertTrue(np.allclose(theta.orig_range[1], 1))

        mcmc_list_names = [p.name for p in model.params.mcmcList]
        self.assertTrue(
            set(mcmc_list_names) == set([
                'betaU', 'betaV', 'lamUz', 'lamVz', 'lamWOs', 'lamWs', 'lamOs',
                'theta'
            ]))
Beispiel #18
0
    def test_multivariate_sim_and_obs_setup(self):
        """
        Tests setup for multivariate sim and obs model with D
        """

        d = SepiaData(t_sim=self.data_dict['t_sim'],
                      y_sim=self.data_dict['y_sim'],
                      y_ind_sim=self.data_dict['y_ind_sim'],
                      y_obs=self.data_dict['y_obs'],
                      y_ind_obs=self.data_dict['y_ind_obs'])
        print(
            'Testing multivariate sim and obs SepiaModelSetup with discrep...',
            flush=True)
        print(d, flush=True)

        # Try it without doing standardization/transform/pc basis to be sure it doesn't break
        model_notrans = setup_model(copy.deepcopy(d))

        # Do explicit transformation
        d.transform_xt()
        d.standardize_y()
        d.create_K_basis(n_pc=5)
        d.create_D_basis(type='constant')
        d.create_D_basis(type='linear')
        custom_D = np.vstack(
            [np.ones(d.obs_data.y.shape[1]), d.obs_data.y_ind])
        d.create_D_basis(D_obs=custom_D)
        model = setup_model(d)

        # Check that either way gives same transformation
        self.assertTrue(
            np.allclose(model_notrans.data.sim_data.orig_y_mean,
                        model.data.sim_data.orig_y_mean))
        self.assertTrue(
            np.allclose(model_notrans.data.sim_data.orig_y_sd,
                        model.data.sim_data.orig_y_sd))
        self.assertTrue(
            np.allclose(model_notrans.data.sim_data.y_std,
                        model.data.sim_data.y_std))
        self.assertTrue(
            np.allclose(model_notrans.data.sim_data.t_trans,
                        model.data.sim_data.t_trans))
        self.assertTrue(
            np.allclose(model_notrans.data.obs_data.orig_y_mean,
                        model.data.obs_data.orig_y_mean))
        self.assertTrue(
            np.allclose(model_notrans.data.obs_data.orig_y_sd,
                        model.data.obs_data.orig_y_sd))
        self.assertTrue(
            np.allclose(model_notrans.data.obs_data.y_std,
                        model.data.obs_data.y_std))

        # Check model components are set up as expected
        self.assertTrue(not model.num.scalar_out)
        self.assertTrue(not model.num.sim_only)
        self.assertTrue(model.num.m == 100)
        self.assertTrue(model.num.n == 1)
        self.assertTrue(model.num.p == 1)
        self.assertTrue(model.num.q == 3)
        self.assertTrue(model.num.pu == 5)
        self.assertTrue(model.num.pv == 2)
        #self.assertTrue(np.allclose(model.num.w, model.data.sim_data.y_std)) # TODO compute projection
        #self.assertTrue(np.allclose(model.num.u, model.data.obs_data.y_std)) # TODO compute projection
        # self.assertTrue(np.allclose(model.num.v, model.data.obs_data.y_std)) # TODO compute projection

        # Check parameter setup -- betaU
        betaU = model.params.betaU
        self.assertTrue(betaU.val_shape == (model.num.q + model.num.p,
                                            model.num.pu))
        self.assertTrue(betaU.prior.dist == 'Beta')
        self.assertTrue(betaU.mcmc.stepType == 'BetaRho')

        # Check parameter setup -- betaV
        betaV = model.params.betaV
        self.assertTrue(betaV.val_shape == (1, 1))
        self.assertTrue(betaV.prior.dist == 'Beta')
        self.assertTrue(betaV.mcmc.stepType == 'BetaRho')

        # Check parameter setup -- lamUz
        lamUz = model.params.lamUz
        self.assertTrue(lamUz.val_shape == (1, model.num.pu))
        self.assertTrue(lamUz.prior.dist == 'Gamma')
        self.assertTrue(lamUz.mcmc.stepType == 'PropMH')

        # Check parameter setup -- lamUz
        lamVz = model.params.lamVz
        self.assertTrue(lamVz.val_shape == (1, 1))
        self.assertTrue(lamVz.prior.dist == 'Gamma')
        self.assertTrue(lamVz.mcmc.stepType == 'PropMH')

        # Check parameter setup -- lamWOs
        lamWOs = model.params.lamWOs
        self.assertTrue(lamWOs.val_shape == (1, 1))
        self.assertTrue(lamWOs.prior.dist == 'Gamma')
        self.assertTrue(lamWOs.mcmc.stepType == 'PropMH')

        # Check parameter setup -- lamWs
        lamWs = model.params.lamWs
        self.assertTrue(lamWs.val_shape == (1, model.num.pu))
        self.assertTrue(lamWs.prior.dist == 'Gamma')
        self.assertTrue(lamWs.mcmc.stepType == 'PropMH')

        # Check parameter setup -- lamOs
        lamOs = model.params.lamOs
        self.assertTrue(lamOs.val_shape == (1, 1))
        self.assertTrue(lamOs.prior.dist == 'Gamma')
        self.assertTrue(lamOs.mcmc.stepType == 'PropMH')

        # Check parameter setup -- theta
        theta = model.params.theta
        self.assertTrue(theta.val_shape == (1, model.num.q))
        self.assertTrue(theta.prior.dist == 'Normal')
        self.assertTrue(theta.mcmc.stepType == 'Uniform')
        self.assertTrue(np.allclose(theta.orig_range[0], 0))
        self.assertTrue(np.allclose(theta.orig_range[1], 1))

        mcmc_list_names = [p.name for p in model.params.mcmcList]
        self.assertTrue(
            set(mcmc_list_names) == set([
                'betaU', 'betaV', 'lamUz', 'lamVz', 'lamWOs', 'lamWs', 'lamOs',
                'theta'
            ]))
Beispiel #19
0
    def test_predict_multi_sim_only(self):

        show_figs = True
        exclude_burnin = True
        n_pc = 2
        seed = 42
        lamWOs_init = 50000.  # use 0 to use default lamWOs initial value
        list_to_sample = [
            1, 2, 3, 4
        ]  # 1-based indexing for matlab; order is [betaU, lamUz, lamWs, lamWOs]
        nsamp = 100
        nburn = 10
        # Open data from matlab
        script_path = os.path.dirname(os.path.realpath(__file__))
        mat_fn = '%s/data/multi_sim_only_mcmc_test.mat' % script_path
        if os.path.isfile(mat_fn):
            # if the matlab data is already in place, just load that
            print(
                'Found matfile, loading from multi_sim_only_mcmc_test.mat \n')
            matfile = scipy.io.loadmat(mat_fn)
        else:
            print('Generating matfile multi_sim_only_mcmc_test.mat \n')
            # Run matlab code, then open data from matlab
            list_to_sample = [
                1, 2, 3, 4
            ]  # 1-based indexing for matlab; order is [betaU, lamUz, lamWs, lamWOs]
            script_path = os.path.dirname(os.path.realpath(__file__))

            # Run matlab code, then open data from matlab
            try:
                eng = matlab.engine.start_matlab()
                eng.cd(script_path)
                eng.addpath('matlab/', nargout=0)
                eng.multi_sim_only_mcmc_test(nsamp,
                                             nburn,
                                             list_to_sample,
                                             seed,
                                             lamWOs_init,
                                             n_pc,
                                             nargout=0)
                eng.quit()
                matfile = scipy.io.loadmat(
                    '%s/data/multi_sim_only_mcmc_test.mat' % script_path)
            except Exception as e:
                print(e)
                print('make sure matlab.engine installed')

        y = matfile['y'].T
        y_ind = matfile['y_ind'].T
        x = matfile['x']
        data = SepiaData(x_sim=x[:, 0][:, None],
                         t_sim=x[:, 1][:, None],
                         y_sim=y,
                         y_ind_sim=y_ind)
        data.standardize_y()
        data.transform_xt()
        data.create_K_basis(n_pc=n_pc)
        print(data)

        np.random.seed(int(seed))
        model = setup_model(data)
        if lamWOs_init > 0:
            model.params.lamWOs.val = np.array([[lamWOs_init]])
        model.params.mcmcList = [
            model.params.mcmcList[i - 1] for i in list_to_sample
        ]
        t_start = time()
        model.do_mcmc(nburn + nsamp)
        t_end = time()
        print('Python mcmc time %0.3g s' % (t_end - t_start))
        print('Matlab mcmc time %0.3g s' % matfile['mcmc_time'])

        np.random.seed(seed)
        psamps = model.get_samples(0, sampleset=[0, 1, 2, 3, 4], flat=True)
        pred = wPred([0.5, 0.5],
                     psamps,
                     model.num,
                     model.data,
                     returnMuSigma=True)
        print('Samples are:')
        print(pred.w.squeeze())
        print('Matlab Samples are:')
        print(matfile['pred_w'].squeeze())

        print('Mu are:')
        print(pred.mu.squeeze())
        print('Matlab Mu are:')
        print(matfile['pred_Myhat'])

        print('Sigma are:')
        print(pred.sigma.squeeze().squeeze().reshape(10, 2).T)
        print('Matlab Sigma are:')
        print(matfile['pred_Syhat'].squeeze())

        print('Checking predicted realizations...')
        self.assertTrue(
            np.allclose(matfile['pred_w'].squeeze(), pred.w.squeeze()))
        print('Checking predicted means...')
        self.assertTrue(
            np.allclose(matfile['pred_Myhat'].squeeze(), pred.mu.squeeze()))
        print('Checking predicted sigmas...')
        self.assertTrue(
            np.allclose(matfile['pred_Syhat'].squeeze(),
                        pred.sigma.squeeze().reshape(10, 2).T))

        pred_arv = wPred([0.5, 0.5],
                         psamps,
                         model.num,
                         model.data,
                         addResidVar=True,
                         returnMuSigma=True)
        print('Add Residual Variance test')
        print('Samples are:')
        print(pred_arv.w.squeeze())
        print('Matlab Samples are:')
        print(matfile['pred_arv_w'].squeeze())

        print('Mu are:')
        print(pred_arv.mu.squeeze())
        print('Matlab Mu are:')
        print(matfile['pred_arv_Myhat'])

        print('Sigma are:')
        print(pred_arv.sigma.squeeze().squeeze().reshape(10, 2).T)
        print('Matlab Sigma are:')
        print(matfile['pred_arv_Syhat'].squeeze())

        print('Checking predicted realizations...')
        self.assertTrue(
            np.allclose(matfile['pred_arv_w'].squeeze(), pred_arv.w.squeeze()))
        print('Checking predicted means...')
        self.assertTrue(
            np.allclose(matfile['pred_arv_Myhat'].squeeze(),
                        pred_arv.mu.squeeze()))
        print('Checking predicted sigmas...')
        self.assertTrue(
            np.allclose(matfile['pred_arv_Syhat'].squeeze(),
                        pred_arv.sigma.squeeze().reshape(10, 2).T))

        print('Done.')
Beispiel #20
0
y_ind_obs = matfile['y_ind_obs'].squeeze()
x_obs = matfile['x_obs']
data = SepiaData(x_sim=xt_sim[:, 0][:, None],
                 t_sim=xt_sim[:, 1][:, None],
                 y_sim=y_sim,
                 y_ind_sim=y_ind_sim,
                 x_obs=x_obs,
                 y_obs=y_obs,
                 y_ind_obs=y_ind_obs)
data.standardize_y()
data.transform_xt()
data.create_K_basis(n_pc=2)
data.create_D_basis(D_obs=matfile['Dobs'].T)
print(data)

model = setup_model(data)

nsamp = int(matfile['nsamp'])
nburn = int(matfile['nburn'])


@timeit
def run_mcmc():
    model.do_mcmc(nburn + nsamp)


print('Python mcmc time:')
run_mcmc()

# import cProfile
# cProfile.run('run_mcmc()', 'mcmc.profile')
Beispiel #21
0
data = SepiaData(t_sim=design,
                 y_sim=y_sim,
                 y_ind_sim=y_ind,
                 y_obs=y_obs,
                 y_ind_obs=y_ind)
data.standardize_y()
data.transform_xt()
data.create_K_basis(n_features - 1)
print(data)

# Setup model
# We have a known observation error
Sigy = np.diag(
    np.squeeze(
        (0.01 * np.ones(n_features) * y_obs) / data.sim_data.orig_y_sd**2))
model = setup_model(data, Sigy)

# Modify priors to match Matlab
model.params.lamWs.prior.bounds[1] = np.inf
model.params.lamWs.prior.params = [np.ones((1, 11)), np.zeros((1, 11))]

# Do mcmc
model.tune_step_sizes(100, 25)
model.do_mcmc(10000)
samples_dict = model.get_samples()

with open('data/sepia_mcmc_samples1-5000.pkl', 'wb') as f:
    pickle.dump(samples_dict, f)

with open('data/sepia_model.pkl', 'wb') as f:
    pickle.dump(model, f)