def test_sample(self): # invariant checks self.inv_test.md_arg(gr.eval_sample, df_arg="df_det") self.inv_test.df_arg(gr.eval_sample, df_arg="df_det", shortcut=True, acc_none="var_det") # No `n` provided with self.assertRaises(ValueError): gr.eval_sample(self.md, df_det="nom") df_min = gr.eval_sample(self.md, n=1, df_det="nom") self.assertTrue(df_min.shape == (1, self.md.n_var + self.md.n_out)) self.assertTrue(set(df_min.columns) == set(self.md.var + self.md.out)) # Seed fixes runs df_seeded = gr.eval_sample(self.md, n=10, df_det="nom", seed=101) df_piped = self.md >> gr.ev_sample(df_det="nom", n=10, seed=101) self.assertTrue(df_seeded.equals(df_piped)) df_skip = gr.eval_sample(self.md, n=1, df_det="nom", skip=True) self.assertTrue(set(df_skip.columns) == set(self.md.var)) df_noappend = gr.eval_sample(self.md, n=1, df_det="nom", append=False) self.assertTrue(set(df_noappend.columns) == set(self.md.out))
def test_ev_sample(self): """Check ev_sample() """ df_res = gr.eval_sample(self.model_default, n=1, seed=101, df_det="nom") self.assertTrue( gr.df_equal( df_res, self.model_default >> gr.ev_sample( seed=101, n=1, df_det="nom")))
def test_eval_input_subsets(self): """ Test inputs are subsets of the provided DataFrames for eval_pnd() """ # Model to make Dataset md_true = make_pareto_random(twoDim=False) # Create dataframe df_data = ( md_true >> gr.ev_sample(n=2e3, seed=101, df_det="nom") ) ## Select training set df_train = ( df_data >> gr.tf_sample(n=10) ) ## select test set df_test = ( df_data >> gr.tf_anti_join( df_train, by=["x1", "x2"], ) >> gr.tf_sample(n=200) ) # Create fitted model md_fit = ( df_train >> ft_gp( var=["x1", "x2", "x3"], out=["y1", "y2", "y3"], ) ) # Call eval_pnd w/ only "y1" and "y2" df_pnd = ( md_fit >> gr.ev_pnd( df_train, df_test, signs = {"y1":1, "y2":1}, seed = 101 ) ) ### how to imply x1 and x2 from y1 and y2? # Test for correctness by shape self.assertTrue(len(df_pnd) == df_test.shape[0]) # Test for correctness by # of outputs self.assertTrue(len(df_pnd.columns.values) == len(df_test.columns.values) + 2)
def test_eval_append(self): """ Test append parameter on eval_pnd() """ # Model to make Dataset md_true = make_pareto_random(twoDim=False) # Create dataframe df_data = ( md_true >> gr.ev_sample(n=2e3, seed=101, df_det="nom") ) ## Select training set df_train = ( df_data >> gr.tf_sample(n=10) ) ## select test set df_test = ( df_data >> gr.tf_anti_join( df_train, by=["x1", "x2"], ) >> gr.tf_sample(n=200) ) # Create fitted model md_fit = ( df_train >> ft_gp( var=["x1", "x2", "x3"], out=["y1", "y2", "y3"], ) ) # Call eval_pnd df_pnd = ( md_fit >> gr.ev_pnd( df_train, df_test, signs = {"y1":1, "y2":1,"y3":1}, seed = 101, append = False ) ) # Test for correctness by shape self.assertTrue(len(df_pnd) == df_test.shape[0]) # Test for correctness by # of outputs self.assertTrue(len(df_pnd.columns.values) == 2)
def test_eval_faulty_inputs(self): """ Test faulty inputs to eval_pnd """ # Model to make Dataset md_true = make_pareto_random() # Create dataframe df_data = ( md_true >> gr.ev_sample(n=2e3, seed=101, df_det="nom") ) ## Select training set df_train = ( df_data >> gr.tf_sample(n=10) ) ## select test set df_test = ( df_data >> gr.tf_anti_join( df_train, by=["x1", "x2"], ) >> gr.tf_sample(n=200) ) # Create fitted model md_fit = ( df_train >> ft_gp( var=["x1", "x2"], out=["y1", "y2"], ) ) # Call eval_pnd with self.assertRaises(ValueError): df_pnd = ( md_fit >> gr.ev_pnd( df_train, df_test, signs = {"y":1, "y2":1}, seed = 101 ) )
def test_sample(self): # No `n` provided with self.assertRaises(ValueError): gr.eval_sample(self.md, df_det="nom") df_min = gr.eval_sample(self.md, n=1, df_det="nom") self.assertTrue(df_min.shape == (1, self.md.n_var + self.md.n_out)) self.assertTrue(set(df_min.columns) == set(self.md.var + self.md.out)) # Seed fixes runs df_seeded = gr.eval_sample(self.md, n=10, df_det="nom", seed=101) df_piped = self.md >> gr.ev_sample(df_det="nom", n=10, seed=101) self.assertTrue(df_seeded.equals(df_piped)) df_skip = gr.eval_sample(self.md, n=1, df_det="nom", skip=True) self.assertTrue(set(df_skip.columns) == set(self.md.var)) df_noappend = gr.eval_sample(self.md, n=1, df_det="nom", append=False) self.assertTrue(set(df_noappend.columns) == set(self.md.out))
def test_tran_reweight(self): """Test the functionality of tran_reweight() """ ## Correctness # Choose scale based on Owen (2013) Exercise 9.7 md_new = (self.md >> gr.cp_marginals( x=dict(dist="norm", loc=0, scale=sqrt(4 / 5)))) df_base = (self.md >> gr.ev_sample(n=500, df_det="nom", seed=101)) df = (df_base >> gr.tf_reweight(md_base=self.md, md_new=md_new) >> gr.tf_summarize( mu=gr.mean(DF.y * DF.weight), se=gr.sd(DF.y * DF.weight) / gr.sqrt(gr.n(DF.weight)), se_orig=gr.sd(DF.y) / gr.sqrt(gr.n(DF.weight)), )) mu = df.mu[0] se = df.se[0] se_orig = df.se_orig[0] self.assertTrue(mu - se * 2 < 0 and 0 < mu + se * 2) ## Optimized IS should be more precise than ordinary monte carlo # print("se_orig = {0:4.3f}".format(se_orig)) # print("se = {0:4.3f}".format(se)) self.assertTrue(se < se_orig) ## Invariants # Missing input in data with self.assertRaises(ValueError): gr.tran_reweight(df_base[["y"]], md_base=self.md, md_new=self.md) # Input mismatch with self.assertRaises(ValueError): gr.tran_reweight(df_base, md_base=self.md, md_new=gr.Model()) # Weights collision with self.assertRaises(ValueError): gr.tran_reweight(df_base >> gr.tf_mutate(weight=0), md_base=self.md, md_new=self.md)
def test_nls(self): ## Ground-truth model c_true = 2 a_true = 1 md_true = (gr.Model() >> gr.cp_function( fun=lambda x: a_true * np.exp(x[0] * c_true) + x[1], var=["x", "epsilon"], out=["y"], ) >> gr.cp_marginals(epsilon={ "dist": "norm", "loc": 0, "scale": 0.5 }) >> gr.cp_copula_independence()) df_data = md_true >> gr.ev_sample( n=5, seed=101, df_det=gr.df_make(x=[0, 1, 2, 3, 4])) ## Model to fit md_param = (gr.Model() >> gr.cp_function( fun=lambda x: x[2] * np.exp(x[0] * x[1]), var=["x", "c", "a"], out=["y"]) >> gr.cp_bounds(c=[0, 4], a=[0.1, 2.0])) ## Fit the model md_fit = df_data >> gr.ft_nls( md=md_param, verbose=False, uq_method="linpool", ) ## Unidentifiable model throws warning # ------------------------- md_unidet = (gr.Model() >> gr.cp_function( fun=lambda x: x[2] / x[3] * np.exp(x[0] * x[1]), var=["x", "c", "a", "z"], out=["y"], ) >> gr.cp_bounds(c=[0, 4], a=[0.1, 2.0], z=[0, 1])) with self.assertWarns(RuntimeWarning): gr.fit_nls( df_data, md=md_unidet, uq_method="linpool", ) ## True parameters in wide confidence region # ------------------------- alpha = 1e-3 self.assertTrue( (md_fit.density.marginals["c"].q(alpha / 2) <= c_true) and (c_true <= md_fit.density.marginals["c"].q(1 - alpha / 2))) self.assertTrue( (md_fit.density.marginals["a"].q(alpha / 2) <= a_true) and (a_true <= md_fit.density.marginals["a"].q(1 - alpha / 2))) ## Model with fixed parameter # ------------------------- md_fixed = (gr.Model() >> gr.cp_function( fun=lambda x: x[2] * np.exp(x[0] * x[1]), var=["x", "c", "a"], out=["y"]) >> gr.cp_bounds(c=[0, 4], a=[1, 1])) md_fit_fixed = df_data >> gr.ft_nls( md=md_fixed, verbose=False, uq_method="linpool") # Test that fixed model can evaluate successfully gr.eval_sample(md_fit_fixed, n=1, df_det="nom") ## Trajectory model # ------------------------- md_base = models.make_trajectory_linear() md_fit = data.df_trajectory_windowed >> gr.ft_nls( md=md_base, method="SLSQP", tol=1e-3) df_tmp = md_fit >> gr.ev_nominal(df_det="nom") ## Select output for fitting # ------------------------- # Split model has inconsistent "true" parameter value md_split = (gr.Model("Split") >> gr.cp_vec_function( fun=lambda df: gr.df_make( f=1 * df.c * df.x, g=2 * df.c * df.x, ), var=["c", "x"], out=["f", "g"], ) >> gr.cp_bounds( x=(-1, +1), c=(-1, +1), )) df_split = (gr.df_make(x=gr.linspace(-1, +1, 100)) >> gr.tf_mutate( f=X.x, g=X.x)) # Fitting both outputs: cannot achieve mse ~= 0 df_both = (df_split >> gr.ft_nls(md_split, out=["f", "g"]) >> gr.ev_df(df_split >> gr.tf_rename(f_t=X.f, g_t=X.g)) >> gr.tf_summarize( mse_f=gr.mse(X.f, X.f_t), mse_g=gr.mse(X.g, X.g_t), )) self.assertTrue(df_both.mse_f[0] > 0) self.assertTrue(df_both.mse_g[0] > 0) # Fitting "f" only df_f = (df_split >> gr.ft_nls(md_split, out=["f"]) >> gr.ev_df(df_split >> gr.tf_rename(f_t=X.f, g_t=X.g)) >> gr.tf_summarize( mse_f=gr.mse(X.f, X.f_t), mse_g=gr.mse(X.g, X.g_t), )) self.assertTrue(df_f.mse_f[0] < 1e-16) self.assertTrue(df_f.mse_g[0] > 0) # Fitting "g" only df_g = (df_split >> gr.ft_nls(md_split, out=["g"]) >> gr.ev_df(df_split >> gr.tf_rename(f_t=X.f, g_t=X.g)) >> gr.tf_summarize( mse_f=gr.mse(X.f, X.f_t), mse_g=gr.mse(X.g, X.g_t), )) self.assertTrue(df_g.mse_f[0] > 0) self.assertTrue(df_g.mse_g[0] < 1e-16)
def test_nls(self): ## Ground-truth model c_true = 2 a_true = 1 md_true = (gr.Model() >> gr.cp_function( fun=lambda x: a_true * np.exp(x[0] * c_true) + x[1], var=["x", "epsilon"], out=["y"], ) >> gr.cp_marginals(epsilon={ "dist": "norm", "loc": 0, "scale": 0.5 }) >> gr.cp_copula_independence()) df_data = md_true >> gr.ev_sample( n=5, seed=101, df_det=gr.df_make(x=[0, 1, 2, 3, 4])) ## Model to fit md_param = (gr.Model() >> gr.cp_function( fun=lambda x: x[2] * np.exp(x[0] * x[1]), var=["x", "c", "a"], out=["y"]) >> gr.cp_bounds(c=[0, 4], a=[0.1, 2.0])) ## Fit the model md_fit = df_data >> gr.ft_nls( md=md_param, verbose=False, uq_method="linpool", ) ## Unidentifiable model throws warning # ------------------------- md_unidet = (gr.Model() >> gr.cp_function( fun=lambda x: x[2] / x[3] * np.exp(x[0] * x[1]), var=["x", "c", "a", "z"], out=["y"], ) >> gr.cp_bounds(c=[0, 4], a=[0.1, 2.0], z=[0, 1])) with self.assertWarns(RuntimeWarning): gr.fit_nls( df_data, md=md_unidet, uq_method="linpool", ) ## True parameters in wide confidence region # ------------------------- alpha = 1e-3 self.assertTrue( (md_fit.density.marginals["c"].q(alpha / 2) <= c_true) and (c_true <= md_fit.density.marginals["c"].q(1 - alpha / 2))) self.assertTrue( (md_fit.density.marginals["a"].q(alpha / 2) <= a_true) and (a_true <= md_fit.density.marginals["a"].q(1 - alpha / 2))) ## Model with fixed parameter # ------------------------- md_fixed = (gr.Model() >> gr.cp_function( fun=lambda x: x[2] * np.exp(x[0] * x[1]), var=["x", "c", "a"], out=["y"]) >> gr.cp_bounds(c=[0, 4], a=[1, 1])) md_fit_fixed = df_data >> gr.ft_nls( md=md_fixed, verbose=False, uq_method="linpool") # Test that fixed model can evaluate successfully gr.eval_sample(md_fit_fixed, n=1, df_det="nom") ## Trajectory model # ------------------------- md_base = models.make_trajectory_linear() md_fit = data.df_trajectory_windowed >> gr.ft_nls( md=md_base, method="SLSQP", tol=1e-3) df_tmp = md_fit >> gr.ev_nominal(df_det="nom")