Example #1
0
    def test_univariate_sim_only_x_only(self):
        """
        Tests setup for univariate sim only where we only use an x input, not t.
        """
        m = 700  # number of simulated observations
        p = 3  # dimension of x (simulation inputs)

        x = 0.5 * np.random.uniform(-1, 3, (m, p))
        y = 5 * np.random.normal(0, 1, m) + 2
        d = SepiaData(x_sim=x, y_sim=y, t_sim=None)

        print('Testing univariate sim-only SepiaData...')
        print(d)
        self.assertTrue(d.obs_data is None)
        self.assertTrue(d.sim_only)
        self.assertTrue(d.scalar_out)

        d.transform_xt()
        self.assertTrue(np.all(np.min(d.sim_data.x_trans, 0) == 0))
        self.assertTrue(np.all(np.max(d.sim_data.x_trans, 0) == 1))

        d.standardize_y(center=False, scale=False)
        self.assertEqual(d.sim_data.orig_y_sd, 1)
        self.assertEqual(d.sim_data.orig_y_mean, 0)
        self.assertTrue(np.allclose(d.sim_data.y, d.sim_data.y_std))

        d.standardize_y(scale='columnwise')
        self.assertTrue(np.allclose(d.sim_data.orig_y_sd, 5, rtol=0.1))
        self.assertTrue(np.allclose(np.mean(d.sim_data.y_std, 0), 0, rtol=0.1))
        self.assertTrue(np.allclose(np.std(d.sim_data.y_std, 0), 1, rtol=0.1))
        self.assertTrue(d.sim_data.y.shape == d.sim_data.y_std.shape)

        d.create_K_basis(10)
        d.create_D_basis()
Example #2
0
    def setUp(self, m=100, n=1, nt_sim=50, nt_obs=20, n_theta=3, n_basis=5, sig_n=0.1, seed=42):
        n_hier = 3
        self.hier_idx = np.array([[0, 0, 0]])
        #self.hier_idx = np.array([[1, 1, 1], [2, -1, 2]]) # TODO this fails for multivariate; cant use for univariate now
        multi_data_list = []
        univ_data_list = []
        for si in range(n_hier):
            multi_data_dict = generate_data.generate_multi_sim_and_obs(m=m, n=n, nt_sim=nt_sim, nt_obs=nt_obs,
                                                                       n_theta=n_theta, n_basis=n_basis,
                                                                       sig_n=sig_n, seed=seed)
            univ_data_dict = generate_data.generate_univ_sim_and_obs(m=m, n=n, sig_n=sig_n, seed=seed)

            d = SepiaData(t_sim=univ_data_dict['t_sim'], y_sim=univ_data_dict['y_sim'], y_obs=univ_data_dict['y_obs'])
            d.transform_xt()
            d.standardize_y()
            univ_data_list.append(d)

            d = SepiaData(t_sim=multi_data_dict['t_sim'], y_sim=multi_data_dict['y_sim'],
                          y_ind_sim=multi_data_dict['y_ind_sim'], y_obs=multi_data_dict['y_obs'],
                          y_ind_obs=multi_data_dict['y_ind_obs'])
            d.transform_xt()
            d.standardize_y()
            d.create_K_basis(5)
            d.create_D_basis('constant')
            multi_data_list.append(d)

        self.univ_model_list = [SepiaModel(d) for d in univ_data_list]
        self.multi_model_list = [SepiaModel(d) for d in multi_data_list]
Example #3
0
    def test_univariate_sim_only_setup(self):
        """
        Tests setup for univariate sim only model
        """

        d = SepiaData(t_sim=self.data_dict['t_sim'], y_sim=self.data_dict['y_sim'])
        print('Testing univariate sim-only SepiaModelSetup...', flush=True)
        print(d, flush=True)

        # Try it without doing standardization/transform to be sure it doesn't break
        model_notrans = setup_model(copy.deepcopy(d))

        # Do explicit transformation
        d.transform_xt()
        d.standardize_y()
        model = setup_model(d)

        # Check that either way gives same transformation
        self.assertTrue(np.allclose(model_notrans.data.sim_data.orig_y_mean, model.data.sim_data.orig_y_mean))
        self.assertTrue(np.allclose(model_notrans.data.sim_data.orig_y_sd, model.data.sim_data.orig_y_sd))
        self.assertTrue(np.allclose(model_notrans.data.sim_data.y_std, model.data.sim_data.y_std))
        self.assertTrue(np.allclose(model_notrans.data.sim_data.t_trans, model.data.sim_data.t_trans))

        # Check model components are set up as expected
        self.assertTrue(model.num.scalar_out)
        self.assertTrue(model.num.sim_only)
        self.assertTrue(model.num.m == 100)
        self.assertTrue(model.num.n == 0)
        self.assertTrue(model.num.p == 1)
        self.assertTrue(model.num.q == 1)
        self.assertTrue(model.num.pu == 1)
        self.assertTrue(model.num.pv == 0)
        self.assertTrue(np.allclose(model.num.w, model.data.sim_data.y_std))

        # Check parameter setup -- betaU
        betaU = model.params.betaU
        self.assertTrue(betaU.val_shape == (2, 1))
        self.assertTrue(betaU.prior.dist == 'Beta')
        self.assertTrue(betaU.mcmc.stepType == 'BetaRho')

        # Check parameter setup -- lamUz
        lamUz = model.params.lamUz
        self.assertTrue(lamUz.val_shape == (1, 1))
        self.assertTrue(lamUz.prior.dist == 'Gamma')
        self.assertTrue(lamUz.mcmc.stepType == 'PropMH')

        # Check parameter setup -- lamWOs
        lamWOs = model.params.lamWOs
        self.assertTrue(lamWOs.val_shape == (1, 1))
        self.assertTrue(lamWOs.prior.dist == 'Gamma')
        self.assertTrue(lamWOs.mcmc.stepType == 'PropMH')

        # Check parameter setup -- lamWs
        lamWs = model.params.lamWs
        self.assertTrue(lamWs.val_shape == (1, 1))
        self.assertTrue(lamWs.prior.dist == 'Gamma')
        self.assertTrue(lamWs.mcmc.stepType == 'PropMH')

        mcmc_list_names = [p.name for p in model.params.mcmcList]
        self.assertTrue(set(mcmc_list_names) == set(['betaU', 'lamUz', 'lamWOs', 'lamWs']))
Example #4
0
def setup_univ_sim_only(m=300,
                        seed=42.,
                        n_lik=0,
                        n_mcmc=0,
                        n_pred=0,
                        n_lev=0,
                        n_burn=0,
                        sens=0):
    try:
        eng = matlab.engine.start_matlab()
        eng.cd(root_path)
        eng.addpath('matlab/', nargout=0)
        res = eng.setup_univ_sim_only(m,
                                      seed,
                                      n_lik,
                                      n_mcmc,
                                      n_pred,
                                      n_lev,
                                      n_burn,
                                      sens,
                                      nargout=1)
        eng.quit()
    except Exception as e:
        print(e)
        print(
            'Matlab error; make sure matlab.engine installed, check Matlab code for errors.'
        )
    y = np.array(res['y'], dtype=float)
    xt = np.array(res['xt'], dtype=float)
    data = SepiaData(x_sim=xt[:, 0][:, None], t_sim=xt[:, 1][:, None], y_sim=y)
    print(data)
    data.standardize_y()
    data.transform_xt()
    model = SepiaModel(data)
    return model, res
Example #5
0
def setup_multi_sim_and_obs_sharedtheta(m=100,
                                        n=10,
                                        nt_sim=20,
                                        nt_obs=15,
                                        noise_sd=0.1,
                                        nx=5,
                                        n_pc=10,
                                        seed=42.,
                                        n_lik=0,
                                        n_mcmc=0,
                                        n_pred=0,
                                        n_shared=2,
                                        clist=[],
                                        fix_K=False):
    try:
        eng = matlab.engine.start_matlab()
        eng.cd(root_path)
        eng.addpath('matlab/', nargout=0)
        res = eng.setup_multi_sim_and_obs_sharedtheta(m,
                                                      n,
                                                      nt_sim,
                                                      nt_obs,
                                                      noise_sd,
                                                      nx,
                                                      n_pc,
                                                      seed,
                                                      n_lik,
                                                      n_mcmc,
                                                      n_pred,
                                                      n_shared,
                                                      matlab.double(clist),
                                                      nargout=1)
        eng.quit()
    except Exception as e:
        print(e)
        print(
            'Matlab error; make sure matlab.engine installed, check Matlab code for errors.'
        )
    y = np.array(res['y'], dtype=float)  # (m, nt_sim, n_shared)
    y_ind = np.array(res['y_ind'], dtype=float).squeeze()  # (nt_sim, n_shared)
    xt = np.array(res['xt'], dtype=float)  # (m, nx, n_shared)
    y_obs = np.array(res['y_obs'], dtype=float)  # (n, nt_sim, n_shared)
    y_ind_obs = np.array(res['y_ind_obs'],
                         dtype=float).squeeze()  # (nt_obs, n_shared)
    x_obs = np.array(res['x_obs'], dtype=float)  # (n, 1, n_shared)
    model_list = []
    for i in range(n_shared):
        data = SepiaData(x_sim=xt[:, 0, i][:, None],
                         t_sim=xt[:, 1:, i],
                         y_sim=y[:, :, i],
                         y_ind_sim=y_ind[:, i],
                         x_obs=x_obs[:, :, i],
                         y_obs=y_obs[:, :, i],
                         y_ind_obs=y_ind_obs[:, i])
        data.standardize_y()
        data.transform_xt()
        data.create_K_basis(n_pc)
        model = SepiaModel(data)
        model_list.append(model)
    return model_list, res
Example #6
0
def setup_univ_sim_and_obs(m=100, n=50, seed=42., n_lik=0, n_mcmc=0, n_pred=0):
    try:
        eng = matlab.engine.start_matlab()
        eng.cd(root_path)
        eng.addpath('matlab/', nargout=0)
        res = eng.setup_univ_sim_and_obs(m,
                                         n,
                                         seed,
                                         n_lik,
                                         n_mcmc,
                                         n_pred,
                                         nargout=1)
        eng.quit()
    except Exception as e:
        print(e)
        print(
            'Matlab error; make sure matlab.engine installed, check Matlab code for errors.'
        )
    y = np.array(res['y'], dtype=float)
    xt = np.array(res['xt'], dtype=float)
    y_obs = np.array(res['y_obs'], dtype=float)
    x_obs = np.array(res['x_obs'], dtype=float).reshape((n, 1))
    data = SepiaData(x_sim=xt[:, 0][:, None],
                     t_sim=xt[:, 1][:, None],
                     y_sim=y,
                     x_obs=x_obs,
                     y_obs=y_obs)
    data.standardize_y()
    data.transform_xt()
    print(data)
    model = SepiaModel(data)
    return model, res
Example #7
0
    def test_emulator_setup_loglik(self):
        # set up regular model
        d = SepiaData(x_sim=self.x_sim,
                      y_sim=self.y_sim,
                      y_ind_sim=np.array([0, 1]))
        d.create_K_basis(K=np.eye(2))
        d.transform_xt(x_notrans=True)
        d.standardize_y(scale='columnwise')
        print(d)
        mod = SepiaModel(d)
        print('Emulator model LL=%f \n' % compute_log_lik(mod))

        # set up kron model
        kd = SepiaData(xt_sim_sep=self.x_sim_kron,
                       y_sim=self.y_sim,
                       y_ind_sim=self.y_ind_sim)
        kd.create_K_basis(K=np.eye(2))
        kd.transform_xt(x_notrans=True)
        kd.standardize_y(scale='columnwise')
        print(kd)
        kmod = SepiaModel(kd)
        print('Emulator Sep model LL=%f \n' % compute_log_lik(kmod))

        self.assertAlmostEqual(compute_log_lik(mod),
                               compute_log_lik(kmod),
                               places=5)
        pass
    def test_univariate_sim_and_obs(self):
        """
        Tests univiariate sim and obs where we pass in both x and t.
        """
        m = 700  # number of simulated observations
        p = 3  # dimension of x (sim/obs inputs)
        q = 2  # dimension of t (extra sim inputs)
        n = 5  # number of observed observations

        x_sim = np.random.uniform(-1, 3, (m, p))
        t = np.random.uniform(-10, 10, (m, q))
        x_obs = np.random.uniform(-1.5, 3.5, (n, p))
        y_sim = 5 * np.random.normal(0, 1, m) + 2
        y_obs = 5 * np.random.normal(0, 1, n) + 1
        d = SepiaData(x_sim=x_sim,
                      y_sim=y_sim,
                      t_sim=t,
                      x_obs=x_obs,
                      y_obs=y_obs)

        print('Testing univariate sim and obs SepiaData...')
        print(d)
        self.assertTrue(d.obs_data is not None)
        self.assertTrue(not d.sim_only)
        self.assertTrue(d.scalar_out)

        d.transform_xt()
        self.assertTrue(np.all(np.min(d.sim_data.x_trans, 0) == 0))
        self.assertTrue(np.all(np.max(d.sim_data.x_trans, 0) == 1))
        self.assertTrue(np.all(np.min(d.sim_data.t_trans, 0) == 0))
        self.assertTrue(np.all(np.max(d.sim_data.t_trans, 0) == 1))

        d.transform_xt(-10, 10)
        self.assertTrue(np.all(np.min(d.sim_data.x_trans, 0) == -10))
        self.assertTrue(np.all(np.max(d.sim_data.x_trans, 0) == 10))
        self.assertTrue(np.all(np.min(d.sim_data.t_trans, 0) == -10))
        self.assertTrue(np.all(np.max(d.sim_data.t_trans, 0) == 10))

        d.standardize_y(center=False, scale=False)
        self.assertEqual(d.sim_data.orig_y_sd, 1)
        self.assertEqual(d.sim_data.orig_y_mean, 0)
        self.assertTrue(np.allclose(d.sim_data.y, d.sim_data.y_std))
        self.assertEqual(d.obs_data.orig_y_sd, 1)
        self.assertEqual(d.obs_data.orig_y_mean, 0)
        self.assertTrue(np.allclose(d.obs_data.y, d.obs_data.y_std))

        d.standardize_y(scale='columnwise')
        self.assertTrue(np.allclose(d.sim_data.orig_y_sd, 5, rtol=0.1))
        self.assertTrue(np.allclose(d.sim_data.orig_y_mean, 2, rtol=0.1))
        self.assertTrue(np.allclose(np.mean(d.sim_data.y_std, 0), 0, rtol=0.1))
        self.assertTrue(np.allclose(np.std(d.sim_data.y_std, 0), 1, rtol=0.1))
        self.assertTrue(d.sim_data.y.shape == d.sim_data.y_std.shape)
        self.assertTrue(np.allclose(d.obs_data.orig_y_sd, 5, rtol=0.1))
        self.assertTrue(np.allclose(d.obs_data.orig_y_mean, 2, rtol=0.1))
        self.assertTrue(d.obs_data.y.shape == d.obs_data.y_std.shape)

        d.create_K_basis(10)
        d.create_D_basis()
Example #9
0
def setup_multi_sim_and_obs(m=100,
                            n=10,
                            nt_sim=20,
                            nt_obs=15,
                            noise_sd=0.1,
                            nx=5,
                            n_pc=10,
                            seed=42.,
                            n_lik=0,
                            n_mcmc=0,
                            n_pred=0,
                            fix_K=False):
    try:
        eng = matlab.engine.start_matlab()
        eng.cd(root_path)
        eng.addpath('matlab/', nargout=0)
        res = eng.setup_multi_sim_and_obs(m,
                                          n,
                                          nt_sim,
                                          nt_obs,
                                          noise_sd,
                                          nx,
                                          n_pc,
                                          seed,
                                          n_lik,
                                          n_mcmc,
                                          n_pred,
                                          nargout=1)
        eng.quit()
    except Exception as e:
        print(e)
        print(
            'Matlab error; make sure matlab.engine installed, check Matlab code for errors.'
        )
    y = np.array(res['y'], dtype=float)
    y_ind = np.array(res['y_ind'], dtype=float).squeeze()
    xt = np.array(res['xt'], dtype=float)
    y_obs = np.array(res['y_obs'], dtype=float)
    y_ind_obs = np.array(res['y_ind_obs'], dtype=float).squeeze()
    x_obs = np.array(res['x_obs'], dtype=float)
    data = SepiaData(x_sim=xt[:, 0][:, None],
                     t_sim=xt[:, 1:],
                     y_sim=y,
                     y_ind_sim=y_ind,
                     x_obs=x_obs,
                     y_obs=y_obs,
                     y_ind_obs=y_ind_obs)
    data.standardize_y()
    data.transform_xt()
    if fix_K:  # means use the K from matlab - avoid issues with positive/negative component ambiguity
        data.create_K_basis(n_pc, K=np.array(res['K']).T)
    else:
        data.create_K_basis(n_pc)
    data.create_D_basis('constant')
    print(data)
    model = SepiaModel(data)
    return model, res
Example #10
0
def create_test_case():
    n_obs = 2
    n_sim = 5
    p = 3
    q = 4
    ell_sim = 80
    ell_obs = 20
    t = np.random.uniform(0, 1, (n_sim, q))
    x = 0.5 * np.ones((n_sim, p))
    y_ind = np.linspace(0, 100, ell_sim)
    y = 10 * np.random.normal(
        0, 1,
        (n_sim, 1)) * (y_ind[None, :] - 50)**2 / 75. + 20 * np.random.normal(
            0, 1, (n_sim, 1)) * y_ind[None, :] + 20 * np.random.normal(
                0, 1, (n_sim, 1))

    x_obs = 0.5 * np.ones((n_obs, p))
    y_obs_ind = np.linspace(10, 85, ell_obs)
    y_obs = 10 * np.random.normal(0, 1, (n_obs, 1)) * (
        y_obs_ind[None, :] - 50)**2 / 75. + 20 * np.random.normal(
            0, 1, (n_obs, 1)) * y_obs_ind[None, :] + 20 * np.random.normal(
                0, 1, (n_obs, 1))

    data = SepiaData(x_sim=x,
                     t_sim=t,
                     y_sim=y,
                     y_ind_sim=y_ind,
                     x_obs=x_obs,
                     y_obs=y_obs,
                     y_ind_obs=y_obs_ind)
    data.standardize_y()
    data.transform_xt()
    data.create_K_basis(n_pc=3)
    data.create_D_basis('constant')

    # Save as matfile for testing in matlab
    savedict = {
        't': t,
        'y': y,
        'y_obs': y_obs,
        'D': data.obs_data.D,
        'Kobs': data.obs_data.K,
        'Ksim': data.sim_data.K,
        'y_obs_std': data.obs_data.y_std,
        'y_sim_std': data.sim_data.y_std,
        'y_sd': data.sim_data.orig_y_sd
    }
    scipy.io.savemat('data/test_case_matlab.mat', savedict)

    g = setup_model(data)

    # Save pickle file of results
    savedict = {'model': g, 'data': data}
    with open('data/test_case_python_model.pkl', 'wb') as f:
        pickle.dump(savedict, f)
    def test_multivariate_sim_only_x_only(self):
        """
        Tests setup for multivariate sim only where we only use an x input, not t.
        """
        m = 700  # number of simulated observations
        p = 3  # dimension of x (simulation inputs)
        ell = 1000  # dimension of y output
        pu = 3  # number of PCs

        y_ind = np.linspace(0, 100, ell)
        K_true = np.vstack([
            0.5 * (np.sin(y_ind) + 1),
            np.square(-y_ind + 50) / 2500, y_ind / 100
        ])
        y = np.transpose(
            np.log(1 + y_ind)[:, None] + np.dot(
                K_true.T, 2 * np.array([1, 0.5, 0.2])[:, None] *
                np.random.normal(0, 1, (pu, m))))
        x = 0.5 * np.random.uniform(-1, 3, (m, p))
        d = SepiaData(x_sim=x, y_sim=y, t_sim=None, y_ind_sim=y_ind)

        print('Testing multivariate sim-only SepiaData...')
        print(d)
        self.assertTrue(d.obs_data is None)
        self.assertTrue(d.sim_only)
        self.assertTrue(not d.scalar_out)

        d.transform_xt()
        self.assertTrue(np.all(np.min(d.sim_data.x_trans, 0) == 0))
        self.assertTrue(np.all(np.max(d.sim_data.x_trans, 0) == 1))

        d.transform_xt(-10, 10)
        self.assertTrue(np.all(np.min(d.sim_data.x_trans, 0) == -10))
        self.assertTrue(np.all(np.max(d.sim_data.x_trans, 0) == 10))

        d.standardize_y(center=False, scale=False)
        self.assertEqual(d.sim_data.orig_y_sd, 1)
        self.assertEqual(d.sim_data.orig_y_mean, 0)
        self.assertTrue(np.allclose(d.sim_data.y, d.sim_data.y_std))

        d.standardize_y(scale='columnwise')
        self.assertTrue(
            np.allclose(d.sim_data.orig_y_mean,
                        np.log(1 + y_ind),
                        rtol=0.1,
                        atol=0.5))
        self.assertTrue(np.allclose(np.std(d.sim_data.y_std, 0), 1, rtol=0.1))
        self.assertTrue(np.allclose(np.mean(d.sim_data.y_std, 0), 0, rtol=0.1))
        self.assertTrue(d.sim_data.y.shape == d.sim_data.y_std.shape)

        d.create_K_basis(3)
        self.assertTrue(d.sim_data.K.shape == (pu, ell))
        d.create_D_basis()
        print(d)
Example #12
0
def setup_multi_sim_and_obs_noD(m=100,
                                n=10,
                                nt_sim=20,
                                nt_obs=15,
                                noise_sd=0.1,
                                nx=5,
                                n_pc=10,
                                seed=42.,
                                n_lik=0,
                                n_mcmc=0):
    try:
        eng = matlab.engine.start_matlab()
        eng.cd(root_path)
        eng.addpath('matlab/', nargout=0)
        res = eng.setup_multi_sim_and_obs_noD(m,
                                              n,
                                              nt_sim,
                                              nt_obs,
                                              noise_sd,
                                              nx,
                                              n_pc,
                                              seed,
                                              n_lik,
                                              n_mcmc,
                                              nargout=1)
        eng.quit()
    except Exception as e:
        print(e)
        print(
            'Matlab error; make sure matlab.engine installed, check Matlab code for errors.'
        )
    y = np.array(res['y'], dtype=float)
    y_ind = np.array(res['y_ind'], dtype=float).squeeze()
    xt = np.array(res['xt'], dtype=float)
    y_obs = np.array(res['y_obs'], dtype=float)
    y_ind_obs = np.array(res['y_ind_obs'], dtype=float).squeeze()
    x_obs = np.array(res['x_obs'], dtype=float)
    data = SepiaData(x_sim=xt[:, 0][:, None],
                     t_sim=xt[:, 1:],
                     y_sim=y,
                     y_ind_sim=y_ind,
                     x_obs=x_obs,
                     y_obs=y_obs,
                     y_ind_obs=y_ind_obs)
    data.standardize_y()
    data.transform_xt()
    data.create_K_basis(n_pc)
    print(data)
    model = SepiaModel(data)
    return model, res
    def setUp(self,
              m=20,
              n=1,
              nt_sim=30,
              nt_obs=20,
              n_theta=3,
              n_basis=5,
              sig_n=0.1,
              seed=42):
        multi_data_dict = generate_data.generate_multi_sim_and_obs(
            m=m,
            n=n,
            nt_sim=nt_sim,
            nt_obs=nt_obs,
            n_theta=n_theta,
            n_basis=n_basis,
            sig_n=sig_n,
            seed=seed)
        univ_data_dict = generate_data.generate_univ_sim_and_obs(m=m,
                                                                 n=n,
                                                                 sig_n=sig_n,
                                                                 seed=seed)

        d = SepiaData(t_sim=univ_data_dict['t_sim'],
                      y_sim=univ_data_dict['y_sim'])
        d.transform_xt()
        d.standardize_y()
        self.univ_sim_only_model = SepiaModel(d)

        d = SepiaData(t_sim=univ_data_dict['t_sim'],
                      y_sim=univ_data_dict['y_sim'],
                      y_obs=univ_data_dict['y_obs'])
        d.transform_xt()
        d.standardize_y()
        self.univ_sim_and_obs_model = SepiaModel(d)

        d = SepiaData(t_sim=multi_data_dict['t_sim'],
                      y_sim=multi_data_dict['y_sim'],
                      y_ind_sim=multi_data_dict['y_ind_sim'])
        d.transform_xt()
        d.standardize_y()
        d.create_K_basis(5)
        self.multi_sim_only_model = SepiaModel(d)

        t = np.concatenate([
            multi_data_dict['t_sim'],
            np.random.choice(range(1, 5), (m, 1), replace=True)
        ],
                           axis=1)
        d = SepiaData(t_sim=t,
                      y_sim=multi_data_dict['y_sim'],
                      y_ind_sim=multi_data_dict['y_ind_sim'],
                      t_cat_ind=[0, 0, 0, 4])
        d.transform_xt()
        d.standardize_y()
        d.create_K_basis(5)
        self.multi_sim_only_catind_model = SepiaModel(d)
Example #14
0
def setup_multi_sim_only(m=300,
                         nt=20,
                         nx=5,
                         n_pc=10,
                         seed=42.,
                         n_lik=0,
                         n_mcmc=0,
                         n_pred=0,
                         fix_K=False,
                         sens=0):
    try:
        eng = matlab.engine.start_matlab()
        eng.cd(root_path)
        eng.addpath('matlab/', nargout=0)
        res = eng.setup_multi_sim_only(m,
                                       nt,
                                       nx,
                                       n_pc,
                                       seed,
                                       n_lik,
                                       n_mcmc,
                                       n_pred,
                                       sens,
                                       nargout=1)
        eng.quit()
    except Exception as e:
        print(e)
        print(
            'Matlab error; make sure matlab.engine installed, check Matlab code for errors.'
        )
    y = np.array(res['y'], dtype=float)
    y_ind = np.array(res['y_ind'], dtype=float).squeeze()
    xt = np.array(res['xt'], dtype=float)
    data = SepiaData(x_sim=xt[:, 0][:, None],
                     t_sim=xt[:, 1:],
                     y_sim=y,
                     y_ind_sim=y_ind)
    data.standardize_y()
    data.transform_xt()
    data.create_K_basis(n_pc)
    if fix_K:
        data.sim_data.K = np.array(res['K']).T
    print(data)
    model = SepiaModel(data)
    return model, res
Example #15
0
    def test_univariate_sim_only_lik(self):
        """
        Tests log lik for univariate sim only model
        """

        d = SepiaData(t_sim=self.univ_data_dict['t_sim'],
                      y_sim=self.univ_data_dict['y_sim'])
        print('Testing univariate sim-only SepiaLogLik...', flush=True)
        print(d, flush=True)

        d.transform_xt()
        d.standardize_y()
        model = setup_model(d)

        model.logLik()

        for param in model.params.mcmcList:
            for cindex in range(int(np.prod(param.val_shape))):
                model.logLik(cvar=param.name, cindex=cindex)
    def setUp(self, m=100, n=1, nt_sim=50, nt_obs=20, n_theta=3, n_basis=5, sig_n=0.1, seed=42):
        multi_data_dict = generate_data.generate_multi_sim_and_obs(m=m, n=n, nt_sim=nt_sim, nt_obs=nt_obs,
                                                                   n_theta=n_theta, n_basis=n_basis,
                                                                   sig_n=sig_n, seed=seed)
        univ_data_dict = generate_data.generate_univ_sim_and_obs(m=m, n=n, sig_n=sig_n, seed=seed)

        d = SepiaData(t_sim=univ_data_dict['t_sim'], y_sim=univ_data_dict['y_sim'])
        d.transform_xt()
        d.standardize_y()
        self.univ_sim_only_data = d

        d = SepiaData(t_sim=univ_data_dict['t_sim'], y_sim=univ_data_dict['y_sim'], y_obs=univ_data_dict['y_obs'])
        d.transform_xt()
        d.standardize_y()
        self.univ_sim_and_obs_data = d

        d = SepiaData(t_sim=multi_data_dict['t_sim'], y_sim=multi_data_dict['y_sim'],
                      y_ind_sim=multi_data_dict['y_ind_sim'])
        d.transform_xt()
        d.standardize_y()
        d.create_K_basis(5)
        self.multi_sim_only_data = d

        d = SepiaData(t_sim=multi_data_dict['t_sim'], y_sim=multi_data_dict['y_sim'],
                      y_ind_sim=multi_data_dict['y_ind_sim'], y_obs=multi_data_dict['y_obs'],
                      y_ind_obs=multi_data_dict['y_ind_obs'])
        d.transform_xt()
        d.standardize_y()
        d.create_K_basis(5)
        self.multi_sim_and_obs_noD_data = d

        d = SepiaData(t_sim=multi_data_dict['t_sim'], y_sim=multi_data_dict['y_sim'],
                      y_ind_sim=multi_data_dict['y_ind_sim'], y_obs=multi_data_dict['y_obs'],
                      y_ind_obs=multi_data_dict['y_ind_obs'])
        d.transform_xt()
        d.standardize_y()
        d.create_K_basis(5)
        d.create_D_basis('linear')
        self.multi_sim_and_obs_data = d
Example #17
0
    def test_multivariate_sim_and_obs_noD_lik(self):
        """
        Tests log lik for multivariate sim and obs model no discrep
        """

        d = SepiaData(t_sim=self.multi_data_dict['t_sim'],
                      y_sim=self.multi_data_dict['y_sim'],
                      y_ind_sim=self.multi_data_dict['y_ind_sim'],
                      y_obs=self.multi_data_dict['y_obs'],
                      y_ind_obs=self.multi_data_dict['y_ind_obs'])
        print('Testing multivariate sim-only SepiaLogLik...', flush=True)
        print(d, flush=True)

        d.transform_xt()
        d.standardize_y()
        d.create_K_basis(5)
        model = setup_model(d)

        model.logLik()

        for param in model.params.mcmcList:
            for cindex in range(int(np.prod(param.val_shape))):
                model.logLik(cvar=param.name, cindex=cindex)
Example #18
0
    def test_predict_univ_sim_only(self):

        np.random.seed(42)

        show_figs = True

        # Open data from matlab
        script_path = os.path.dirname(os.path.realpath(__file__))
        mat_fn = '%s/data/univ_sim_only_mcmc_test.mat' % script_path
        if os.path.isfile(mat_fn):
            # if the matlab data is already in place, just load that
            print('Found matfile, loading from univ_sim_only_mcmc_test.mat \n')
            matfile = scipy.io.loadmat(mat_fn)
        else:
            print('Generating matfile univ_sim_only_mcmc_test.mat \n')
            # Run matlab code, then open data from matlab
            try:
                eng = matlab.engine.start_matlab()
                eng.cd(script_path)
                eng.addpath('matlab/', nargout=0)
                eng.univ_sim_only_mcmc_test(nargout=0)
                eng.quit()
                matfile = scipy.io.loadmat('mat_fn')
            except Exception as e:
                print(e)
                print('make sure matlab.engine installed')

        y = matfile['y']
        x = matfile['x']
        t = matfile['t']
        data = SepiaData(x_sim=x, t_sim=t, y_sim=y)
        data.standardize_y()
        data.transform_xt()
        print(data)
        model = setup_model(data)
        nsamp = int(matfile['nsamp'])
        nburn = int(matfile['nburn'])
        t_start = time()
        model.do_mcmc(nburn + nsamp)
        t_end = time()
        print('Python mcmc time %0.3g s' % (t_end - t_start))
        print('Matlab mcmc time %0.3g s' % matfile['mcmc_time'])
        # Creates dict with each sampled variable name as key, array of samples (nsamp, ...) as value
        samples = model.get_samples(nburn)
        log_post = np.array(model.params.lp.mcmc.draws)

        np.random.seed(42)
        psamps = model.get_samples(0, sampleset=[0, 1, 2, 3, 4], flat=True)
        pred = wPred([0.5, 0.5],
                     psamps,
                     model.num,
                     model.data,
                     returnMuSigma=True)
        print('Samples are:')
        print(pred.w.squeeze())
        print('Matlab Samples are:')
        print(matfile['pred_w'].squeeze())

        print('Mu are:')
        print(pred.mu.squeeze())
        print('Matlab Mu are:')
        print(matfile['pred_Myhat'].squeeze())

        print('Sigma are:')
        print(pred.sigma.squeeze())
        print('Matlab Sigma are:')
        print(matfile['pred_Syhat'].squeeze())

        print('Checking predicted realizations...')
        self.assertTrue(
            np.allclose(matfile['pred_w'].squeeze(), pred.w.squeeze()))
        print('Checking predicted means...')
        self.assertTrue(
            np.allclose(matfile['pred_Myhat'].squeeze(), pred.mu.squeeze()))
        print('Checking predicted sigmas...')
        self.assertTrue(
            np.allclose(matfile['pred_Syhat'].squeeze(), pred.sigma.squeeze()))

        # Prediction with multiple realizations
        np.random.seed(42)
        sampleset = np.arange(100, 1001, 100) - 1
        samples = model.get_samples(sampleset=sampleset)
        nq = 10
        t = np.linspace(0, 1, nq)
        xpred = np.column_stack((np.ones((nq, 1)) * 0.5, t))
        pred_plot = wPred(xpred, samples, model.num, model.data)

        print('pred_plot_w are:')
        print(pred_plot.w.squeeze()[0, :])
        print('Matlab pred_plot_w are:')
        print(matfile['pred_plot_w'].squeeze()[0, :])

        print('Checking predicted realizations for plotting...')
        # Apparently numerics come into play here, need to turn down the rtol on 'close'
        self.assertTrue(
            np.allclose(matfile['pred_plot_w'].squeeze(),
                        pred_plot.w.squeeze(),
                        rtol=1e-3))

        print('Done.')

        if show_figs:
            import matplotlib.pyplot as plt
            plt.figure()
            plt.plot(data.sim_data.t_trans, data.sim_data.y_std)
            plt.plot(np.tile(t, (len(sampleset), 1)), np.squeeze(pred_plot.w),
                     '.')
            plt.show()
Example #19
0
    def test_predict_uv_from_multi_obs(self):

        show_figs = True
        exclude_burnin = True
        n_pc = 2
        seed = 42.
        lamWOs_init = 50000.  # use 0 to use default lamWOs initial value
        nsamp = 100
        nburn = 0
        # Open data from matlab
        script_path = os.path.dirname(os.path.realpath(__file__))
        mat_fn = '%s/data/multi_sim_and_obs_mcmc_test.mat' % script_path
        if os.path.isfile(mat_fn):
            # if the matlab data is already in place, just load that
            print(
                'Found matfile, loading from multi_sim_and_obs_mcmc_test.mat \n'
            )
            matfile = scipy.io.loadmat(mat_fn)
        else:
            print('Generating matfile multi_sim_and_obs_mcmc_test.mat \n')
            # Run matlab code, then open data from matlab
            script_path = os.path.dirname(os.path.realpath(__file__))

            # Run matlab code, then open data from matlab
            try:
                eng = matlab.engine.start_matlab()
                eng.cd(script_path)
                eng.addpath('matlab/', nargout=0)
                eng.multi_sim_and_obs_mcmc_test(nsamp,
                                                nburn,
                                                seed,
                                                lamWOs_init,
                                                n_pc,
                                                0,
                                                nargout=0)
                eng.quit()
                matfile = scipy.io.loadmat(
                    '%s/data/multi_sim_and_obs_mcmc_test.mat' % script_path)
            except Exception as e:
                print(e)
                print('make sure matlab.engine installed')

        nburn = int(matfile['nburn'])
        nsamp = int(matfile['nsamp'])

        y_sim = matfile['y'].T
        y_ind_sim = matfile['y_ind'].squeeze()
        xt_sim = matfile['x']
        y_obs = matfile['y_obs']
        y_ind_obs = matfile['y_ind_obs'].squeeze()
        x_obs = matfile['x_obs']
        data = SepiaData(x_sim=xt_sim[:, 0][:, None],
                         t_sim=xt_sim[:, 1][:, None],
                         y_sim=y_sim,
                         y_ind_sim=y_ind_sim,
                         x_obs=x_obs,
                         y_obs=y_obs,
                         y_ind_obs=y_ind_obs)
        data.standardize_y()
        data.transform_xt()
        data.create_K_basis(n_pc=n_pc)
        data.create_D_basis(D=matfile['Dobs'].T)
        print(data)

        np.random.seed(int(seed))
        model = setup_model(data)
        if lamWOs_init > 0:
            model.params.lamWOs.val = np.array([[lamWOs_init]])

        t_start = time()
        model.do_mcmc(nburn + nsamp)
        t_end = time()
        print('Python mcmc time %0.3g s' % (t_end - t_start))
        print('Matlab mcmc time %0.3g s' % matfile['mcmc_time'])

        np.random.seed(int(seed))
        psamps = model.get_samples(0, sampleset=[0, 4], flat=True)
        #pred = uvPred([0.5], psamps, model.num, model.data, returnMuSigma=True, useAltW=True)
        pred = uvPred([0.5], psamps, model.num, model.data, returnMuSigma=True)
        print('Samples of u are:')
        print(pred.u.squeeze())
        print('Matlab Samples of u are:')
        print(matfile['pred2_u'].squeeze())

        print('Samples of v are:')
        print(pred.v.squeeze())
        print('Matlab Samples of v are:')
        print(matfile['pred2_v'].squeeze())

        print('Mu are:')
        print(pred.mu.squeeze())
        print('Matlab Mu are:')
        print(matfile['pred2_Myhat'])

        print('Sigma are:')
        print(pred.sigma.squeeze().reshape(14, 7).T)
        print('Matlab Sigma are:')
        print(matfile['pred2_Syhat'].squeeze())

        print('Checking predicted realizations...')
        self.assertTrue(
            np.allclose(matfile['pred2_u'].squeeze(), pred.u.squeeze()))
        self.assertTrue(
            np.allclose(matfile['pred2_v'].squeeze(), pred.v.squeeze()))
        print('Checking predicted means...')
        self.assertTrue(
            np.allclose(matfile['pred2_Myhat'].squeeze(), pred.mu.squeeze()))
        print('Checking predicted sigmas...')
        self.assertTrue(
            np.allclose(matfile['pred2_Syhat'].squeeze(),
                        pred.sigma.squeeze().reshape(14, 7).T))

        print('Done.')
Example #20
0
data = SepiaData(t_sim=data_dict['t_sim'], y_sim=data_dict['y_sim'], y_ind_sim=data_dict['y_ind_sim'],
                 y_obs=data_dict['y_obs'], y_ind_obs=data_dict['y_ind_obs'])

print(data)

plt.plot(data.sim_data.y_ind, data.sim_data.y.T)
plt.plot(data.obs_data.y_ind, data.obs_data.y.T, 'k.', linewidth=3)
plt.title('Synthetic data (obs. in black)')
plt.xlabel('y index')
plt.ylabel('y')
plt.show()

#%%

data.transform_xt()
data.standardize_y(scale='columnwise')
data.create_K_basis(5)
data.create_D_basis(type='linear')

print(data)

#%%

model = SepiaModel(data)

#%%

cachefile_name='multivariate_example_with_prediction.pkl'
import os.path
import pickle
# simulated data
with open(datadir+'desNative80x4Cg.txt','r') as f:
    sim_data = np.loadtxt(f)
x_sim = sim_data[:,0:2] # x = {R, rho_ball}
t_sim = sim_data[:,2:4] # t = {C, g}
with open(datadir+'simHeights101x1','r') as f:
    h_sim = np.loadtxt(f)
with open(datadir+'sims101x80Cg.txt','r') as f:
    y_sim = np.loadtxt(f).T

# create sepia data object
data = SepiaData(x_sim = x_sim, t_sim = t_sim, y_ind_sim = h_sim, y_sim = y_sim,\
                 x_obs = x_obs, y_obs = y_obs, y_ind_obs = h_obs)
data.transform_xt()
data.standardize_y()
data.create_K_basis(3)
data.create_D_basis('linear')
print(data)
model = setup_model(data)

#%% Ragged data and model setup
y_obs_ragged = [np.array(field_data[0:3,4]),np.array(field_data[3:6,4]),\
         np.array(field_data[6:9,4]),np.array(field_data[9:,4])]
h_obs_ragged = [np.array(field_data[0:3,3]),np.array(field_data[3:6,3]),\
         np.array(field_data[6:9,3]),np.array(field_data[9:,3])]# observed heights

#y_obs = [np.array(field_data[0:3,4]),np.array(field_data[3:6,4]),\
#         np.array(field_data[[7,9,11],4]),np.array(field_data[12:,4])]
#h_obs = [np.array(field_data[0:3,3]),np.array(field_data[3:6,3]),\
#         np.array(field_data[[7,9,11],3]),np.array(field_data[12:,3])]# observed heights
Example #22
0
                 y_obs=data_dict['y_obs'],
                 y_ind_obs=data_dict['y_ind_obs'])

print(data)

plt.plot(data.sim_data.y_ind, data.sim_data.y.T)
plt.plot(data.obs_data.y_ind, data.obs_data.y.T, 'k.', linewidth=3)
plt.title('Synthetic data (obs. in black)')
plt.xlabel('y index')
plt.ylabel('y')
plt.show()

#%%

data.transform_xt()
data.standardize_y('columnwise')
data.create_K_basis(5)
data.create_D_basis(type='linear')

print(data)

#%%

model = setup_model(data)

#%%

cachefile_name = 'multivariate_example_with_prediction.pkl'
import os.path
import pickle
Example #23
0
    def test_multivariate_sim_and_obs_lamVzGroups_setup(self):
        """
        Tests setup for multivariate sim and obs model with D and lamVzGroups
        """

        d = SepiaData(t_sim=self.data_dict['t_sim'],
                      y_sim=self.data_dict['y_sim'],
                      y_ind_sim=self.data_dict['y_ind_sim'],
                      y_obs=self.data_dict['y_obs'],
                      y_ind_obs=self.data_dict['y_ind_obs'])
        print(
            'Testing multivariate sim and obs SepiaModelSetup with discrep...',
            flush=True)
        print(d, flush=True)

        # Do explicit transformation
        d.transform_xt()
        d.standardize_y()
        d.create_K_basis(n_pc=5)
        custom_D = np.vstack([
            np.ones(d.obs_data.y.shape[1]), d.obs_data.y_ind,
            d.obs_data.y_ind**2
        ])
        d.create_D_basis(D_obs=custom_D)
        lamVzGroup = [0, 1, 1]
        model = setup_model(d, lamVzGroup=lamVzGroup)

        # Check model components are set up as expected
        self.assertTrue(not model.num.scalar_out)
        self.assertTrue(not model.num.sim_only)
        self.assertTrue(model.num.m == 100)
        self.assertTrue(model.num.n == 1)
        self.assertTrue(model.num.p == 1)
        self.assertTrue(model.num.q == 3)
        self.assertTrue(model.num.pu == 5)
        self.assertTrue(model.num.pv == 3)

        # Check parameter setup -- betaU
        betaU = model.params.betaU
        self.assertTrue(betaU.val_shape == (model.num.q + model.num.p,
                                            model.num.pu))
        self.assertTrue(betaU.prior.dist == 'Beta')
        self.assertTrue(betaU.mcmc.stepType == 'BetaRho')

        # Check parameter setup -- betaV
        betaV = model.params.betaV
        self.assertTrue(betaV.val_shape == (1, 2))
        self.assertTrue(betaV.prior.dist == 'Beta')
        self.assertTrue(betaV.mcmc.stepType == 'BetaRho')

        # Check parameter setup -- lamUz
        lamUz = model.params.lamUz
        self.assertTrue(lamUz.val_shape == (1, model.num.pu))
        self.assertTrue(lamUz.prior.dist == 'Gamma')
        self.assertTrue(lamUz.mcmc.stepType == 'PropMH')

        # Check parameter setup -- lamUz
        lamVz = model.params.lamVz
        self.assertTrue(lamVz.val_shape == (1, 2))
        self.assertTrue(lamVz.prior.dist == 'Gamma')
        self.assertTrue(lamVz.mcmc.stepType == 'PropMH')

        # Check parameter setup -- lamWOs
        lamWOs = model.params.lamWOs
        self.assertTrue(lamWOs.val_shape == (1, 1))
        self.assertTrue(lamWOs.prior.dist == 'Gamma')
        self.assertTrue(lamWOs.mcmc.stepType == 'PropMH')

        # Check parameter setup -- lamWs
        lamWs = model.params.lamWs
        self.assertTrue(lamWs.val_shape == (1, model.num.pu))
        self.assertTrue(lamWs.prior.dist == 'Gamma')
        self.assertTrue(lamWs.mcmc.stepType == 'PropMH')

        # Check parameter setup -- lamOs
        lamOs = model.params.lamOs
        self.assertTrue(lamOs.val_shape == (1, 1))
        self.assertTrue(lamOs.prior.dist == 'Gamma')
        self.assertTrue(lamOs.mcmc.stepType == 'PropMH')

        # Check parameter setup -- theta
        theta = model.params.theta
        self.assertTrue(theta.val_shape == (1, model.num.q))
        self.assertTrue(theta.prior.dist == 'Normal')
        self.assertTrue(theta.mcmc.stepType == 'Uniform')
        self.assertTrue(np.allclose(theta.orig_range[0], 0))
        self.assertTrue(np.allclose(theta.orig_range[1], 1))

        mcmc_list_names = [p.name for p in model.params.mcmcList]
        self.assertTrue(
            set(mcmc_list_names) == set([
                'betaU', 'betaV', 'lamUz', 'lamVz', 'lamWOs', 'lamWs', 'lamOs',
                'theta'
            ]))
Example #24
0
    def test_predict_multi_sim_only(self):

        show_figs = True
        exclude_burnin = True
        n_pc = 2
        seed = 42
        lamWOs_init = 50000.  # use 0 to use default lamWOs initial value
        list_to_sample = [
            1, 2, 3, 4
        ]  # 1-based indexing for matlab; order is [betaU, lamUz, lamWs, lamWOs]
        nsamp = 100
        nburn = 10
        # Open data from matlab
        script_path = os.path.dirname(os.path.realpath(__file__))
        mat_fn = '%s/data/multi_sim_only_mcmc_test.mat' % script_path
        if os.path.isfile(mat_fn):
            # if the matlab data is already in place, just load that
            print(
                'Found matfile, loading from multi_sim_only_mcmc_test.mat \n')
            matfile = scipy.io.loadmat(mat_fn)
        else:
            print('Generating matfile multi_sim_only_mcmc_test.mat \n')
            # Run matlab code, then open data from matlab
            list_to_sample = [
                1, 2, 3, 4
            ]  # 1-based indexing for matlab; order is [betaU, lamUz, lamWs, lamWOs]
            script_path = os.path.dirname(os.path.realpath(__file__))

            # Run matlab code, then open data from matlab
            try:
                eng = matlab.engine.start_matlab()
                eng.cd(script_path)
                eng.addpath('matlab/', nargout=0)
                eng.multi_sim_only_mcmc_test(nsamp,
                                             nburn,
                                             list_to_sample,
                                             seed,
                                             lamWOs_init,
                                             n_pc,
                                             nargout=0)
                eng.quit()
                matfile = scipy.io.loadmat(
                    '%s/data/multi_sim_only_mcmc_test.mat' % script_path)
            except Exception as e:
                print(e)
                print('make sure matlab.engine installed')

        y = matfile['y'].T
        y_ind = matfile['y_ind'].T
        x = matfile['x']
        data = SepiaData(x_sim=x[:, 0][:, None],
                         t_sim=x[:, 1][:, None],
                         y_sim=y,
                         y_ind_sim=y_ind)
        data.standardize_y()
        data.transform_xt()
        data.create_K_basis(n_pc=n_pc)
        print(data)

        np.random.seed(int(seed))
        model = setup_model(data)
        if lamWOs_init > 0:
            model.params.lamWOs.val = np.array([[lamWOs_init]])
        model.params.mcmcList = [
            model.params.mcmcList[i - 1] for i in list_to_sample
        ]
        t_start = time()
        model.do_mcmc(nburn + nsamp)
        t_end = time()
        print('Python mcmc time %0.3g s' % (t_end - t_start))
        print('Matlab mcmc time %0.3g s' % matfile['mcmc_time'])

        np.random.seed(seed)
        psamps = model.get_samples(0, sampleset=[0, 1, 2, 3, 4], flat=True)
        pred = wPred([0.5, 0.5],
                     psamps,
                     model.num,
                     model.data,
                     returnMuSigma=True)
        print('Samples are:')
        print(pred.w.squeeze())
        print('Matlab Samples are:')
        print(matfile['pred_w'].squeeze())

        print('Mu are:')
        print(pred.mu.squeeze())
        print('Matlab Mu are:')
        print(matfile['pred_Myhat'])

        print('Sigma are:')
        print(pred.sigma.squeeze().squeeze().reshape(10, 2).T)
        print('Matlab Sigma are:')
        print(matfile['pred_Syhat'].squeeze())

        print('Checking predicted realizations...')
        self.assertTrue(
            np.allclose(matfile['pred_w'].squeeze(), pred.w.squeeze()))
        print('Checking predicted means...')
        self.assertTrue(
            np.allclose(matfile['pred_Myhat'].squeeze(), pred.mu.squeeze()))
        print('Checking predicted sigmas...')
        self.assertTrue(
            np.allclose(matfile['pred_Syhat'].squeeze(),
                        pred.sigma.squeeze().reshape(10, 2).T))

        pred_arv = wPred([0.5, 0.5],
                         psamps,
                         model.num,
                         model.data,
                         addResidVar=True,
                         returnMuSigma=True)
        print('Add Residual Variance test')
        print('Samples are:')
        print(pred_arv.w.squeeze())
        print('Matlab Samples are:')
        print(matfile['pred_arv_w'].squeeze())

        print('Mu are:')
        print(pred_arv.mu.squeeze())
        print('Matlab Mu are:')
        print(matfile['pred_arv_Myhat'])

        print('Sigma are:')
        print(pred_arv.sigma.squeeze().squeeze().reshape(10, 2).T)
        print('Matlab Sigma are:')
        print(matfile['pred_arv_Syhat'].squeeze())

        print('Checking predicted realizations...')
        self.assertTrue(
            np.allclose(matfile['pred_arv_w'].squeeze(), pred_arv.w.squeeze()))
        print('Checking predicted means...')
        self.assertTrue(
            np.allclose(matfile['pred_arv_Myhat'].squeeze(),
                        pred_arv.mu.squeeze()))
        print('Checking predicted sigmas...')
        self.assertTrue(
            np.allclose(matfile['pred_arv_Syhat'].squeeze(),
                        pred_arv.sigma.squeeze().reshape(10, 2).T))

        print('Done.')
Example #25
0
    def test_multivariate_sim_and_obs_ragged(self):
        """
        Tests multivariate sim and obs where we pass in x and t but obs is ragged.
        """
        m = 700  # number of simulated observations
        p = 3  # dimension of x (simulation inputs)
        ell_sim = 1000  # dimension of y output sim
        pu = 3  # number of PCs
        q = 2  # dimension of t (extra sim inputs)
        n = 5  # number of observed observations

        ell_obs = np.random.randint(100, 600, n)

        y_ind_sim = np.linspace(0, 100, ell_sim)
        K_true_sim = np.vstack([
            0.5 * (np.sin(y_ind_sim) + 1),
            np.square(-y_ind_sim + 50) / 2500, y_ind_sim / 100
        ])
        y_sim = np.transpose(
            np.log(1 + y_ind_sim)[:, None] + np.dot(
                K_true_sim.T, 2 * np.array([1, 0.5, 0.2])[:, None] *
                np.random.normal(0, 1, (pu, m))))
        x_sim = 0.5 * np.random.uniform(-1, 3, (m, p))
        t = np.random.uniform(-10, 10, (m, q))

        y_ind_obs = [
            np.linspace(0, 100, ell_obs[i]) +
            np.random.uniform(-3, 3, ell_obs[i]) for i in range(len(ell_obs))
        ]
        for yi in y_ind_obs:
            yi[yi < 0] = 0
        K_true_obs = [
            np.vstack(
                [0.5 * (np.sin(yi) + 1),
                 np.square(-yi + 50) / 2500, yi / 100]) for yi in y_ind_obs
        ]
        y_obs = [
            10 + np.squeeze(
                np.log(1 + y_ind_obs[i])[:, None] + np.dot(
                    K_true_obs[i].T, 2 * np.array([1, 0.5, 0.2])[:, None] *
                    np.random.normal(0, 1, (pu, 1))))
            for i in range(len(y_ind_obs))
        ]
        x_obs = 0.5 * np.random.uniform(-1, 3, (n, p))

        d = SepiaData(x_sim=x_sim,
                      y_sim=y_sim,
                      t_sim=t,
                      y_ind_sim=y_ind_sim,
                      x_obs=x_obs,
                      y_obs=y_obs,
                      y_ind_obs=y_ind_obs)

        print('Testing multivariate sim and obs SepiaData...')
        print(d)
        self.assertTrue(d.obs_data is not None)
        self.assertTrue(not d.sim_only)
        self.assertTrue(not d.scalar_out)

        d.transform_xt()
        self.assertTrue(np.all(np.min(d.sim_data.x_trans, 0) == 0))
        self.assertTrue(np.all(np.max(d.sim_data.x_trans, 0) == 1))
        self.assertTrue(np.all(np.min(d.sim_data.t_trans, 0) == 0))
        self.assertTrue(np.all(np.max(d.sim_data.t_trans, 0) == 1))

        d.standardize_y(center=False, scale=False)
        self.assertEqual(d.sim_data.orig_y_sd, 1)
        self.assertEqual(d.sim_data.orig_y_mean, 0)
        self.assertTrue(np.allclose(d.sim_data.y, d.sim_data.y_std))
        #self.assertEqual(d.obs_data.orig_y_sd, 1)
        #self.assertEqual(d.obs_data.orig_y_mean, 0)
        #self.assertTrue(np.allclose(d.obs_data.y, d.obs_data.y_std))

        d.standardize_y(scale='columnwise')
        self.assertTrue(
            np.allclose(d.sim_data.orig_y_mean,
                        np.log(1 + y_ind_sim),
                        rtol=0.1,
                        atol=0.5))
        self.assertTrue(np.allclose(np.mean(d.sim_data.y_std, 0), 0, rtol=0.1))
        self.assertTrue(np.allclose(np.std(d.sim_data.y_std, 0), 1, rtol=0.1))
        self.assertTrue(d.sim_data.y.shape == d.sim_data.y_std.shape)

        d.create_K_basis(3)
        self.assertTrue(d.sim_data.K.shape == (pu, ell_sim))
        d.create_D_basis()
        print(d)
Example #26
0
    def test_multivariate_sim_and_obs_no_x(self):
        """
        Tests multivariate sim and obs where we pass in t but not x (x is a dummy variable).
        """
        m = 700  # number of simulated observations
        ell_sim = 1000  # dimension of y output sim
        ell_obs = 258  # dimension of y output obs
        pu = 3  # number of PCs
        q = 2  # dimension of t (extra sim inputs)
        n = 5  # number of observed observations

        y_ind_sim = np.linspace(0, 100, ell_sim)
        K_true_sim = np.vstack([
            0.5 * (np.sin(y_ind_sim) + 1),
            np.square(-y_ind_sim + 50) / 2500, y_ind_sim / 100
        ])
        y_sim = np.transpose(
            np.log(1 + y_ind_sim)[:, None] + np.dot(
                K_true_sim.T, 2 * np.array([1, 0.5, 0.2])[:, None] *
                np.random.normal(0, 1, (pu, m))))
        t = np.random.uniform(-10, 10, (m, q))

        y_ind_obs = np.linspace(0, 100, ell_obs) + np.random.uniform(
            -3, 3, ell_obs)
        y_ind_obs[y_ind_obs < 0] = 0
        K_true_obs = np.vstack([
            0.5 * (np.sin(y_ind_obs) + 1),
            np.square(-y_ind_obs + 50) / 2500, y_ind_obs / 100
        ])
        y_obs = 10 + np.transpose(
            np.log(1 + y_ind_obs)[:, None] + np.dot(
                K_true_obs.T, 2 * np.array([1, 0.5, 0.2])[:, None] *
                np.random.normal(0, 1, (pu, n))))

        d = SepiaData(y_sim=y_sim,
                      t_sim=t,
                      y_ind_sim=y_ind_sim,
                      y_obs=y_obs,
                      y_ind_obs=y_ind_obs)

        print('Testing multivariate sim and obs SepiaData...')
        print(d)
        self.assertTrue(d.obs_data is not None)
        self.assertTrue(not d.sim_only)
        self.assertTrue(not d.scalar_out)

        d.transform_xt()
        self.assertTrue(np.all(d.sim_data.x_trans == 0.5))
        self.assertTrue(np.all(d.obs_data.x_trans == 0.5))
        self.assertTrue(np.all(np.min(d.sim_data.t_trans, 0) == 0))
        self.assertTrue(np.all(np.max(d.sim_data.t_trans, 0) == 1))

        d.standardize_y(center=False, scale=False)
        self.assertEqual(d.sim_data.orig_y_sd, 1)
        self.assertEqual(d.sim_data.orig_y_mean, 0)
        self.assertTrue(np.allclose(d.sim_data.y, d.sim_data.y_std))
        self.assertEqual(d.obs_data.orig_y_sd, 1)
        self.assertEqual(d.obs_data.orig_y_mean, 0)
        self.assertTrue(np.allclose(d.obs_data.y, d.obs_data.y_std))

        d.standardize_y(scale='columnwise')
        self.assertTrue(
            np.allclose(d.sim_data.orig_y_mean,
                        np.log(1 + y_ind_sim),
                        rtol=0.1,
                        atol=0.5))
        self.assertTrue(np.allclose(np.mean(d.sim_data.y_std, 0), 0, rtol=0.1))
        self.assertTrue(np.allclose(np.std(d.sim_data.y_std, 0), 1, rtol=0.1))
        self.assertTrue(d.sim_data.y.shape == d.sim_data.y_std.shape)
        self.assertTrue(
            np.allclose(d.obs_data.orig_y_mean,
                        np.log(1 + y_ind_obs),
                        rtol=0.1,
                        atol=0.5))
        self.assertTrue(d.obs_data.y.shape == d.obs_data.y_std.shape)

        d.create_K_basis(3)
        self.assertTrue(d.sim_data.K.shape == (pu, ell_sim))
        self.assertTrue(d.obs_data.K.shape == (pu, ell_obs))
        d.create_D_basis()
        self.assertTrue(d.obs_data.D.shape == (1, ell_obs))
        print(d)
Example #27
0
    def test_full_setup_LL_pred(self):
        # Set up and check calibration model
        x_obs = np.ones((3, 2)) * np.array([0.5, 0.75, 0.25]).reshape((-1, 1))
        y_obs = np.block([[-0.1, 0.1], [-0.2, 0.3], [0.1, 0]])

        # augment to also test more than scalar dimensions in x and t
        x_sim_cal = np.hstack((0.5 * np.ones(
            (self.x_sim.shape[0], 1)), self.x_sim[:, :1]))
        t_sim_cal = self.x_sim[:, 1:]
        xt_sim_sep = [np.array(0.5).reshape(1, 1)] + self.x_sim_kron

        y_sim_std = (self.y_sim - np.mean(self.y_sim, axis=0).reshape(
            1, -1)) / np.std(self.y_sim, axis=0, ddof=1).reshape(1, -1)

        dc = SepiaData(x_sim=x_sim_cal,
                       t_sim=t_sim_cal,
                       y_sim=y_sim_std,
                       x_obs=x_obs,
                       y_obs=y_obs,
                       y_ind_sim=self.y_ind_sim,
                       y_ind_obs=self.y_ind_sim)
        dc.create_K_basis(K=np.eye(2))
        dc.create_D_basis(D_sim=np.eye(2), D_obs=np.eye(2))
        dc.transform_xt(x_notrans=True, t_notrans=True)
        dc.standardize_y(y_mean=0, y_sd=1)
        print(dc)
        cmod = SepiaModel(dc)

        print('Calibration model LL=%f' % compute_log_lik(cmod))

        kdc = SepiaData(xt_sim_sep=xt_sim_sep,
                        y_sim=y_sim_std,
                        x_obs=x_obs,
                        y_obs=y_obs,
                        y_ind_sim=self.y_ind_sim,
                        y_ind_obs=self.y_ind_sim)
        kdc.create_K_basis(K=np.eye(2))
        kdc.create_D_basis(D_sim=np.eye(2), D_obs=np.eye(2))
        kdc.transform_xt(x_notrans=True, t_notrans=True)
        kdc.standardize_y(y_mean=0, y_sd=1)
        print(kdc)
        kcmod = SepiaModel(kdc)

        print('Calibration Sep model LL=%f' % compute_log_lik(kcmod))

        self.assertAlmostEqual(compute_log_lik(cmod),
                               compute_log_lik(kcmod),
                               places=5)

        print(
            'Running MCMC on Calibration model in Sep pred mode, regular design'
        )
        np.random.seed(42)
        t1 = time()
        cmod.do_mcmc(10)
        print('sampling time %f' % (time() - t1))
        csamp = cmod.get_samples(sampleset=[cmod.get_last_sample_ind()])
        cpred = SepiaFullPrediction(mode='Sep',
                                    model=cmod,
                                    samples=csamp,
                                    storeMuSigma=True,
                                    x_pred=np.array([0.5, 0.5]).reshape(
                                        (1, -1)))
        print(cpred.get_ysim())
        csm, css = cpred.get_mu_sigma()
        print(csm)
        print(css)

        print(
            'Running MCMC on Calibration model in Sep pred mode, separable design'
        )
        np.random.seed(42)
        t1 = time()
        kcmod.do_mcmc(10)
        print('sampling time %f' % (time() - t1))
        kcsamp = kcmod.get_samples(sampleset=[kcmod.get_last_sample_ind()])
        kcpred = SepiaFullPrediction(mode='Sep',
                                     model=kcmod,
                                     samples=kcsamp,
                                     storeMuSigma=True,
                                     x_pred=np.array([0.5, 0.5]).reshape(
                                         (1, -1)))
        print(kcpred.get_ysim())
        kcsm, kcss = kcpred.get_mu_sigma()
        print(kcsm)
        print(kcss)

        print('testing max difference which is %g' % np.max(abs(csm - kcsm)))
        self.assertAlmostEqual(0, np.max(abs(csm - kcsm)))

        print('testing max difference which is %g' % np.max(abs(css - kcss)))
        self.assertAlmostEqual(0, np.max(abs(css - kcss)))

        ###### Timing for predictions

        test_x_pred = np.random.rand(10, 2)

        csamp = cmod.get_samples()
        t1 = time()
        cpred0 = SepiaFullPrediction(mode='notSep',
                                     model=cmod,
                                     samples=csamp,
                                     storeMuSigma=True,
                                     x_pred=test_x_pred)
        print('predict time non-Sep in non-Sep mode %f' % (time() - t1))
        t1 = time()
        cpred = SepiaFullPrediction(mode='Sep',
                                    model=cmod,
                                    samples=csamp,
                                    storeMuSigma=True,
                                    x_pred=test_x_pred)
        print('predict time non-sep in Sep mode %f' % (time() - t1))

        kcsamp = kcmod.get_samples()
        t1 = time()
        kcpred = SepiaFullPrediction(mode='Sep',
                                     model=kcmod,
                                     samples=kcsamp,
                                     storeMuSigma=True,
                                     x_pred=test_x_pred)
        print('predict time Sep %f' % (time() - t1))

        pass
Example #28
0
    print('make sure matlab.engine installed')

y_sim = matfile['y'].T
y_ind_sim = matfile['y_ind'].squeeze()
xt_sim = matfile['x']
y_obs = matfile['y_obs']
y_ind_obs = matfile['y_ind_obs'].squeeze()
x_obs = matfile['x_obs']
data = SepiaData(x_sim=xt_sim[:, 0][:, None],
                 t_sim=xt_sim[:, 1][:, None],
                 y_sim=y_sim,
                 y_ind_sim=y_ind_sim,
                 x_obs=x_obs,
                 y_obs=y_obs,
                 y_ind_obs=y_ind_obs)
data.standardize_y()
data.transform_xt()
data.create_K_basis(n_pc=2)
data.create_D_basis(D_obs=matfile['Dobs'].T)
print(data)

model = setup_model(data)

nsamp = int(matfile['nsamp'])
nburn = int(matfile['nburn'])


@timeit
def run_mcmc():
    model.do_mcmc(nburn + nsamp)
Example #29
0
    def test_multivariate_sim_and_obs_setup(self):
        """
        Tests setup for multivariate sim and obs model with D
        """

        d = SepiaData(t_sim=self.data_dict['t_sim'],
                      y_sim=self.data_dict['y_sim'],
                      y_ind_sim=self.data_dict['y_ind_sim'],
                      y_obs=self.data_dict['y_obs'],
                      y_ind_obs=self.data_dict['y_ind_obs'])
        print(
            'Testing multivariate sim and obs SepiaModelSetup with discrep...',
            flush=True)
        print(d, flush=True)

        # Try it without doing standardization/transform/pc basis to be sure it doesn't break
        model_notrans = setup_model(copy.deepcopy(d))

        # Do explicit transformation
        d.transform_xt()
        d.standardize_y()
        d.create_K_basis(n_pc=5)
        d.create_D_basis(type='constant')
        d.create_D_basis(type='linear')
        custom_D = np.vstack(
            [np.ones(d.obs_data.y.shape[1]), d.obs_data.y_ind])
        d.create_D_basis(D_obs=custom_D)
        model = setup_model(d)

        # Check that either way gives same transformation
        self.assertTrue(
            np.allclose(model_notrans.data.sim_data.orig_y_mean,
                        model.data.sim_data.orig_y_mean))
        self.assertTrue(
            np.allclose(model_notrans.data.sim_data.orig_y_sd,
                        model.data.sim_data.orig_y_sd))
        self.assertTrue(
            np.allclose(model_notrans.data.sim_data.y_std,
                        model.data.sim_data.y_std))
        self.assertTrue(
            np.allclose(model_notrans.data.sim_data.t_trans,
                        model.data.sim_data.t_trans))
        self.assertTrue(
            np.allclose(model_notrans.data.obs_data.orig_y_mean,
                        model.data.obs_data.orig_y_mean))
        self.assertTrue(
            np.allclose(model_notrans.data.obs_data.orig_y_sd,
                        model.data.obs_data.orig_y_sd))
        self.assertTrue(
            np.allclose(model_notrans.data.obs_data.y_std,
                        model.data.obs_data.y_std))

        # Check model components are set up as expected
        self.assertTrue(not model.num.scalar_out)
        self.assertTrue(not model.num.sim_only)
        self.assertTrue(model.num.m == 100)
        self.assertTrue(model.num.n == 1)
        self.assertTrue(model.num.p == 1)
        self.assertTrue(model.num.q == 3)
        self.assertTrue(model.num.pu == 5)
        self.assertTrue(model.num.pv == 2)
        #self.assertTrue(np.allclose(model.num.w, model.data.sim_data.y_std)) # TODO compute projection
        #self.assertTrue(np.allclose(model.num.u, model.data.obs_data.y_std)) # TODO compute projection
        # self.assertTrue(np.allclose(model.num.v, model.data.obs_data.y_std)) # TODO compute projection

        # Check parameter setup -- betaU
        betaU = model.params.betaU
        self.assertTrue(betaU.val_shape == (model.num.q + model.num.p,
                                            model.num.pu))
        self.assertTrue(betaU.prior.dist == 'Beta')
        self.assertTrue(betaU.mcmc.stepType == 'BetaRho')

        # Check parameter setup -- betaV
        betaV = model.params.betaV
        self.assertTrue(betaV.val_shape == (1, 1))
        self.assertTrue(betaV.prior.dist == 'Beta')
        self.assertTrue(betaV.mcmc.stepType == 'BetaRho')

        # Check parameter setup -- lamUz
        lamUz = model.params.lamUz
        self.assertTrue(lamUz.val_shape == (1, model.num.pu))
        self.assertTrue(lamUz.prior.dist == 'Gamma')
        self.assertTrue(lamUz.mcmc.stepType == 'PropMH')

        # Check parameter setup -- lamUz
        lamVz = model.params.lamVz
        self.assertTrue(lamVz.val_shape == (1, 1))
        self.assertTrue(lamVz.prior.dist == 'Gamma')
        self.assertTrue(lamVz.mcmc.stepType == 'PropMH')

        # Check parameter setup -- lamWOs
        lamWOs = model.params.lamWOs
        self.assertTrue(lamWOs.val_shape == (1, 1))
        self.assertTrue(lamWOs.prior.dist == 'Gamma')
        self.assertTrue(lamWOs.mcmc.stepType == 'PropMH')

        # Check parameter setup -- lamWs
        lamWs = model.params.lamWs
        self.assertTrue(lamWs.val_shape == (1, model.num.pu))
        self.assertTrue(lamWs.prior.dist == 'Gamma')
        self.assertTrue(lamWs.mcmc.stepType == 'PropMH')

        # Check parameter setup -- lamOs
        lamOs = model.params.lamOs
        self.assertTrue(lamOs.val_shape == (1, 1))
        self.assertTrue(lamOs.prior.dist == 'Gamma')
        self.assertTrue(lamOs.mcmc.stepType == 'PropMH')

        # Check parameter setup -- theta
        theta = model.params.theta
        self.assertTrue(theta.val_shape == (1, model.num.q))
        self.assertTrue(theta.prior.dist == 'Normal')
        self.assertTrue(theta.mcmc.stepType == 'Uniform')
        self.assertTrue(np.allclose(theta.orig_range[0], 0))
        self.assertTrue(np.allclose(theta.orig_range[1], 1))

        mcmc_list_names = [p.name for p in model.params.mcmcList]
        self.assertTrue(
            set(mcmc_list_names) == set([
                'betaU', 'betaV', 'lamUz', 'lamVz', 'lamWOs', 'lamWs', 'lamOs',
                'theta'
            ]))