Ejemplo n.º 1
0
    def setUp(self, m=100, n=1, nt_sim=50, nt_obs=20, n_theta=3, n_basis=5, sig_n=0.1, seed=42):
        n_hier = 3
        self.hier_idx = np.array([[0, 0, 0]])
        #self.hier_idx = np.array([[1, 1, 1], [2, -1, 2]]) # TODO this fails for multivariate; cant use for univariate now
        multi_data_list = []
        univ_data_list = []
        for si in range(n_hier):
            multi_data_dict = generate_data.generate_multi_sim_and_obs(m=m, n=n, nt_sim=nt_sim, nt_obs=nt_obs,
                                                                       n_theta=n_theta, n_basis=n_basis,
                                                                       sig_n=sig_n, seed=seed)
            univ_data_dict = generate_data.generate_univ_sim_and_obs(m=m, n=n, sig_n=sig_n, seed=seed)

            d = SepiaData(t_sim=univ_data_dict['t_sim'], y_sim=univ_data_dict['y_sim'], y_obs=univ_data_dict['y_obs'])
            d.transform_xt()
            d.standardize_y()
            univ_data_list.append(d)

            d = SepiaData(t_sim=multi_data_dict['t_sim'], y_sim=multi_data_dict['y_sim'],
                          y_ind_sim=multi_data_dict['y_ind_sim'], y_obs=multi_data_dict['y_obs'],
                          y_ind_obs=multi_data_dict['y_ind_obs'])
            d.transform_xt()
            d.standardize_y()
            d.create_K_basis(5)
            d.create_D_basis('constant')
            multi_data_list.append(d)

        self.univ_model_list = [SepiaModel(d) for d in univ_data_list]
        self.multi_model_list = [SepiaModel(d) for d in multi_data_list]
Ejemplo n.º 2
0
    def test_univariate_sim_and_obs(self):
        """
        Tests univiariate sim and obs where we pass in both x and t.
        """
        m = 700  # number of simulated observations
        p = 3  # dimension of x (sim/obs inputs)
        q = 2  # dimension of t (extra sim inputs)
        n = 5  # number of observed observations

        x_sim = np.random.uniform(-1, 3, (m, p))
        t = np.random.uniform(-10, 10, (m, q))
        x_obs = np.random.uniform(-1.5, 3.5, (n, p))
        y_sim = 5 * np.random.normal(0, 1, m) + 2
        y_obs = 5 * np.random.normal(0, 1, n) + 1
        d = SepiaData(x_sim=x_sim,
                      y_sim=y_sim,
                      t_sim=t,
                      x_obs=x_obs,
                      y_obs=y_obs)

        print('Testing univariate sim and obs SepiaData...')
        print(d)
        self.assertTrue(d.obs_data is not None)
        self.assertTrue(not d.sim_only)
        self.assertTrue(d.scalar_out)

        d.transform_xt()
        self.assertTrue(np.all(np.min(d.sim_data.x_trans, 0) == 0))
        self.assertTrue(np.all(np.max(d.sim_data.x_trans, 0) == 1))
        self.assertTrue(np.all(np.min(d.sim_data.t_trans, 0) == 0))
        self.assertTrue(np.all(np.max(d.sim_data.t_trans, 0) == 1))

        d.transform_xt(-10, 10)
        self.assertTrue(np.all(np.min(d.sim_data.x_trans, 0) == -10))
        self.assertTrue(np.all(np.max(d.sim_data.x_trans, 0) == 10))
        self.assertTrue(np.all(np.min(d.sim_data.t_trans, 0) == -10))
        self.assertTrue(np.all(np.max(d.sim_data.t_trans, 0) == 10))

        d.standardize_y(center=False, scale=False)
        self.assertEqual(d.sim_data.orig_y_sd, 1)
        self.assertEqual(d.sim_data.orig_y_mean, 0)
        self.assertTrue(np.allclose(d.sim_data.y, d.sim_data.y_std))
        self.assertEqual(d.obs_data.orig_y_sd, 1)
        self.assertEqual(d.obs_data.orig_y_mean, 0)
        self.assertTrue(np.allclose(d.obs_data.y, d.obs_data.y_std))

        d.standardize_y(scale='columnwise')
        self.assertTrue(np.allclose(d.sim_data.orig_y_sd, 5, rtol=0.1))
        self.assertTrue(np.allclose(d.sim_data.orig_y_mean, 2, rtol=0.1))
        self.assertTrue(np.allclose(np.mean(d.sim_data.y_std, 0), 0, rtol=0.1))
        self.assertTrue(np.allclose(np.std(d.sim_data.y_std, 0), 1, rtol=0.1))
        self.assertTrue(d.sim_data.y.shape == d.sim_data.y_std.shape)
        self.assertTrue(np.allclose(d.obs_data.orig_y_sd, 5, rtol=0.1))
        self.assertTrue(np.allclose(d.obs_data.orig_y_mean, 2, rtol=0.1))
        self.assertTrue(d.obs_data.y.shape == d.obs_data.y_std.shape)

        d.create_K_basis(10)
        d.create_D_basis()
Ejemplo n.º 3
0
    def test_multivariate_sim_only_x_only(self):
        """
        Tests setup for multivariate sim only where we only use an x input, not t.
        """
        m = 700  # number of simulated observations
        p = 3  # dimension of x (simulation inputs)
        ell = 1000  # dimension of y output
        pu = 3  # number of PCs

        y_ind = np.linspace(0, 100, ell)
        K_true = np.vstack([
            0.5 * (np.sin(y_ind) + 1),
            np.square(-y_ind + 50) / 2500, y_ind / 100
        ])
        y = np.transpose(
            np.log(1 + y_ind)[:, None] + np.dot(
                K_true.T, 2 * np.array([1, 0.5, 0.2])[:, None] *
                np.random.normal(0, 1, (pu, m))))
        x = 0.5 * np.random.uniform(-1, 3, (m, p))
        d = SepiaData(x_sim=x, y_sim=y, t_sim=None, y_ind_sim=y_ind)

        print('Testing multivariate sim-only SepiaData...')
        print(d)
        self.assertTrue(d.obs_data is None)
        self.assertTrue(d.sim_only)
        self.assertTrue(not d.scalar_out)

        d.transform_xt()
        self.assertTrue(np.all(np.min(d.sim_data.x_trans, 0) == 0))
        self.assertTrue(np.all(np.max(d.sim_data.x_trans, 0) == 1))

        d.transform_xt(-10, 10)
        self.assertTrue(np.all(np.min(d.sim_data.x_trans, 0) == -10))
        self.assertTrue(np.all(np.max(d.sim_data.x_trans, 0) == 10))

        d.standardize_y(center=False, scale=False)
        self.assertEqual(d.sim_data.orig_y_sd, 1)
        self.assertEqual(d.sim_data.orig_y_mean, 0)
        self.assertTrue(np.allclose(d.sim_data.y, d.sim_data.y_std))

        d.standardize_y(scale='columnwise')
        self.assertTrue(
            np.allclose(d.sim_data.orig_y_mean,
                        np.log(1 + y_ind),
                        rtol=0.1,
                        atol=0.5))
        self.assertTrue(np.allclose(np.std(d.sim_data.y_std, 0), 1, rtol=0.1))
        self.assertTrue(np.allclose(np.mean(d.sim_data.y_std, 0), 0, rtol=0.1))
        self.assertTrue(d.sim_data.y.shape == d.sim_data.y_std.shape)

        d.create_K_basis(3)
        self.assertTrue(d.sim_data.K.shape == (pu, ell))
        d.create_D_basis()
        print(d)
Ejemplo n.º 4
0
    def test_predict_multi_sim_only(self):

        show_figs = True
        exclude_burnin = True
        n_pc = 2
        seed = 42
        lamWOs_init = 50000.  # use 0 to use default lamWOs initial value
        list_to_sample = [
            1, 2, 3, 4
        ]  # 1-based indexing for matlab; order is [betaU, lamUz, lamWs, lamWOs]
        nsamp = 100
        nburn = 10
        # Open data from matlab
        script_path = os.path.dirname(os.path.realpath(__file__))
        mat_fn = '%s/data/multi_sim_only_mcmc_test.mat' % script_path
        if os.path.isfile(mat_fn):
            # if the matlab data is already in place, just load that
            print(
                'Found matfile, loading from multi_sim_only_mcmc_test.mat \n')
            matfile = scipy.io.loadmat(mat_fn)
        else:
            print('Generating matfile multi_sim_only_mcmc_test.mat \n')
            # Run matlab code, then open data from matlab
            list_to_sample = [
                1, 2, 3, 4
            ]  # 1-based indexing for matlab; order is [betaU, lamUz, lamWs, lamWOs]
            script_path = os.path.dirname(os.path.realpath(__file__))

            # Run matlab code, then open data from matlab
            try:
                eng = matlab.engine.start_matlab()
                eng.cd(script_path)
                eng.addpath('matlab/', nargout=0)
                eng.multi_sim_only_mcmc_test(nsamp,
                                             nburn,
                                             list_to_sample,
                                             seed,
                                             lamWOs_init,
                                             n_pc,
                                             nargout=0)
                eng.quit()
                matfile = scipy.io.loadmat(
                    '%s/data/multi_sim_only_mcmc_test.mat' % script_path)
            except Exception as e:
                print(e)
                print('make sure matlab.engine installed')

        y = matfile['y'].T
        y_ind = matfile['y_ind'].T
        x = matfile['x']
        data = SepiaData(x_sim=x[:, 0][:, None],
                         t_sim=x[:, 1][:, None],
                         y_sim=y,
                         y_ind_sim=y_ind)
        data.standardize_y()
        data.transform_xt()
        data.create_K_basis(n_pc=n_pc)
        print(data)

        np.random.seed(int(seed))
        model = setup_model(data)
        if lamWOs_init > 0:
            model.params.lamWOs.val = np.array([[lamWOs_init]])
        model.params.mcmcList = [
            model.params.mcmcList[i - 1] for i in list_to_sample
        ]
        t_start = time()
        model.do_mcmc(nburn + nsamp)
        t_end = time()
        print('Python mcmc time %0.3g s' % (t_end - t_start))
        print('Matlab mcmc time %0.3g s' % matfile['mcmc_time'])

        np.random.seed(seed)
        psamps = model.get_samples(0, sampleset=[0, 1, 2, 3, 4], flat=True)
        pred = wPred([0.5, 0.5],
                     psamps,
                     model.num,
                     model.data,
                     returnMuSigma=True)
        print('Samples are:')
        print(pred.w.squeeze())
        print('Matlab Samples are:')
        print(matfile['pred_w'].squeeze())

        print('Mu are:')
        print(pred.mu.squeeze())
        print('Matlab Mu are:')
        print(matfile['pred_Myhat'])

        print('Sigma are:')
        print(pred.sigma.squeeze().squeeze().reshape(10, 2).T)
        print('Matlab Sigma are:')
        print(matfile['pred_Syhat'].squeeze())

        print('Checking predicted realizations...')
        self.assertTrue(
            np.allclose(matfile['pred_w'].squeeze(), pred.w.squeeze()))
        print('Checking predicted means...')
        self.assertTrue(
            np.allclose(matfile['pred_Myhat'].squeeze(), pred.mu.squeeze()))
        print('Checking predicted sigmas...')
        self.assertTrue(
            np.allclose(matfile['pred_Syhat'].squeeze(),
                        pred.sigma.squeeze().reshape(10, 2).T))

        pred_arv = wPred([0.5, 0.5],
                         psamps,
                         model.num,
                         model.data,
                         addResidVar=True,
                         returnMuSigma=True)
        print('Add Residual Variance test')
        print('Samples are:')
        print(pred_arv.w.squeeze())
        print('Matlab Samples are:')
        print(matfile['pred_arv_w'].squeeze())

        print('Mu are:')
        print(pred_arv.mu.squeeze())
        print('Matlab Mu are:')
        print(matfile['pred_arv_Myhat'])

        print('Sigma are:')
        print(pred_arv.sigma.squeeze().squeeze().reshape(10, 2).T)
        print('Matlab Sigma are:')
        print(matfile['pred_arv_Syhat'].squeeze())

        print('Checking predicted realizations...')
        self.assertTrue(
            np.allclose(matfile['pred_arv_w'].squeeze(), pred_arv.w.squeeze()))
        print('Checking predicted means...')
        self.assertTrue(
            np.allclose(matfile['pred_arv_Myhat'].squeeze(),
                        pred_arv.mu.squeeze()))
        print('Checking predicted sigmas...')
        self.assertTrue(
            np.allclose(matfile['pred_arv_Syhat'].squeeze(),
                        pred_arv.sigma.squeeze().reshape(10, 2).T))

        print('Done.')