def setUp(self): ## Smooth model self.md_smooth = (gr.Model() >> gr.cp_function( fun=lambda x: [x, x + 1], var=["x"], out=["y", "z"]) >> gr.cp_marginals(x={ "dist": "uniform", "loc": 0, "scale": 2 }) >> gr.cp_copula_independence()) self.df_smooth = self.md_smooth >> gr.ev_df( df=pd.DataFrame(dict(x=[0, 1, 2]))) ## Tree model self.md_tree = (gr.Model() >> gr.cp_function( fun=lambda x: [0, x < 5], var=["x"], out=["y", "z"]) >> gr.cp_marginals(x={ "dist": "uniform", "loc": 0, "scale": 2 }) >> gr.cp_copula_independence()) self.df_tree = self.md_tree >> gr.ev_df( df=pd.DataFrame(dict(x=np.linspace(0, 10, num=8)))) ## Cluster model self.df_cluster = pd.DataFrame( dict( x=[0.1, 0.2, 0.3, 0.4, 1.1, 1.2, 1.3, 1.4], y=[0.3, 0.2, 0.1, 0.0, 1.3, 1.2, 1.1, 1.0], c=[0, 0, 0, 0, 1, 1, 1, 1], ))
def test_ev_df(self): """Check ev_df() """ df_res = gr.eval_df(self.model_default, df=self.df_test) self.assertTrue( gr.df_equal(df_res, self.model_default >> gr.ev_df(df=self.df_test)))
def test_drop_out(self): """Checks that output column names are properly dropped""" md = gr.Model() >> gr.cp_function(lambda x: x[0] + 1, var=1, out=1) df_in = gr.df_make(x0=[0, 1, 2], y0=[0, 1, 2]) df_true = gr.df_make(x0=[0, 1, 2], y0=[1, 2, 3]) df_res = md >> gr.ev_df(df=df_in) self.assertTrue(gr.df_equal(df_res, df_true, close=True))
def test_nls(self): ## Setup md_feat = ( gr.Model() >> gr.cp_function(fun=lambda x: x[0] * x[1] + x[2], var=3, out=1,) >> gr.cp_bounds(x0=[-1, +1], x2=[0, 0]) >> gr.cp_marginals(x1=dict(dist="norm", loc=0, scale=1)) ) md_const = ( gr.Model() >> gr.cp_function(fun=lambda x: x[0], var=1, out=1) >> gr.cp_bounds(x0=(-1, +1)) ) df_response = md_feat >> gr.ev_df( df=gr.df_make(x0=0.1, x1=[-1, -0.5, +0, +0.5, +1], x2=0) ) df_data = df_response[["x1", "y0"]] ## Model with features df_true = gr.df_make(x0=0.1) df_fit = md_feat >> gr.ev_nls(df_data=df_data, append=False) pd.testing.assert_frame_equal( df_fit, df_true, check_exact=False, check_dtype=False, check_column_type=False, ) ## Fitting synonym md_feat_fit = df_data >> gr.ft_nls(md=md_feat, verbose=False) self.assertTrue(set(md_feat_fit.var) == set(["x1", "x2"])) ## Constant model df_const = gr.df_make(x0=0) df_fit = md_const >> gr.ev_nls(df_data=gr.df_make(y0=[-1, 0, +1])) pd.testing.assert_frame_equal( df_fit, df_const, check_exact=False, check_dtype=False, check_column_type=False, )
def test_comp_function(self): """Test comp_function()""" md_new0 = gr.comp_function(self.md, fun=lambda x: x, var=1, out=1) md_new1 = gr.comp_function(md_new0, fun=lambda x: x, var=1, out=1) ## Operations above should not affect self.md md_named = gr.comp_function( self.md, fun=lambda x: [x, 2 * x], var=["foo"], out=["bar1", "bar2"], name="test", ) ## Default var and out names self.assertEqual(md_new0.var, ["x0"]) self.assertEqual(md_new0.out, ["y0"]) ## New default names iterate counter self.assertEqual(set(md_new1.var), set(["x0", "x1"])) self.assertEqual(set(md_new1.out), set(["y0", "y1"])) ## Output names assigned correctly # Also tests for copy issues self.assertEqual(set(md_named.out), set(["bar1", "bar2"])) ## Function name assigned correctly self.assertEqual(md_named.functions[0].name, "test") ## Invariant checks with self.assertRaises(ValueError): # Missing function gr.comp_function(self.md, fun=None, var=["foo"], out=["bar"]) with self.assertRaises(ValueError): # Missing var gr.comp_function(self.md, fun=lambda x: x, var=None, out=["bar"]) with self.assertRaises(ValueError): # Missing out gr.comp_function(self.md, fun=lambda x: x, var=["foo"], out=None) with self.assertRaises(ValueError): # Intersection var / out names self.md >> gr.cp_function(lambda x: x, var=["x"], out=["x"], name="f0") with self.assertRaises(ValueError): # Non-unique function names self.md >> gr.cp_function( lambda x: x, var=1, out=1, name="f0" ) >> gr.cp_function(lambda x: x, var=1, out=1, name="f0") ## DAG invariant checks with self.assertRaises(ValueError): # Cycle by input self.md >> gr.cp_function( fun=lambda x: x[0], var=["y0"], out=1 ) >> gr.cp_function(fun=lambda x: x[0], var=1, out=["y0"]) with self.assertRaises(ValueError): # Non-unique output self.md >> gr.cp_function( fun=lambda x: x[0], var=1, out=["y0"] ) >> gr.cp_function(fun=lambda x: x[0], var=1, out=["y0"]) ## Check vectorized builder md_vec = gr.comp_vec_function( self.md, fun=lambda df: df.assign(y0=df.x0), var=1, out=1, ) self.assertTrue( gr.df_equal(gr.df_make(x0=0, y0=0), md_vec >> gr.ev_df(df=gr.df_make(x0=0))) )
def test_nls(self): ## Setup md_feat = (gr.Model() >> gr.cp_function( fun=lambda x: x[0] * x[1] + x[2], var=3, out=1, ) >> gr.cp_bounds(x0=[-1, +1], x2=[0, 0]) >> gr.cp_marginals(x1=dict(dist="norm", loc=0, scale=1))) md_const = (gr.Model() >> gr.cp_function( fun=lambda x: x[0], var=1, out=1) >> gr.cp_bounds(x0=(-1, +1))) df_response = md_feat >> gr.ev_df( df=gr.df_make(x0=0.1, x1=[-1, -0.5, +0, +0.5, +1], x2=0)) df_data = df_response[["x1", "y0"]] ## Model with features df_true = gr.df_make(x0=0.1) df_fit = md_feat >> gr.ev_nls(df_data=df_data, append=False) pd.testing.assert_frame_equal( df_fit, df_true, check_exact=False, check_dtype=False, check_column_type=False, ) ## Fitting synonym md_feat_fit = df_data >> gr.ft_nls(md=md_feat, verbose=False) self.assertTrue(set(md_feat_fit.var) == set(["x1", "x2"])) ## Constant model df_const = gr.df_make(x0=0) df_fit = md_const >> gr.ev_nls(df_data=gr.df_make(y0=[-1, 0, +1])) pd.testing.assert_frame_equal( df_fit, df_const, check_exact=False, check_dtype=False, check_column_type=False, ) ## Multiple restarts works df_multi = gr.eval_nls(md_feat, df_data=df_data, n_restart=2) self.assertTrue(df_multi.shape[0] == 2) ## Specified initial guess df_spec = gr.eval_nls(md_feat, df_data=df_data, df_init=gr.df_make(x0=0.5), append=False) pd.testing.assert_frame_equal( df_spec, df_true, check_exact=False, check_dtype=False, check_column_type=False, ) # Raises if incorrect guess data with self.assertRaises(ValueError): gr.eval_nls(md_feat, df_data=df_data, df_init=gr.df_make(foo=0.5))
def test_nls(self): ## Ground-truth model c_true = 2 a_true = 1 md_true = (gr.Model() >> gr.cp_function( fun=lambda x: a_true * np.exp(x[0] * c_true) + x[1], var=["x", "epsilon"], out=["y"], ) >> gr.cp_marginals(epsilon={ "dist": "norm", "loc": 0, "scale": 0.5 }) >> gr.cp_copula_independence()) df_data = md_true >> gr.ev_sample( n=5, seed=101, df_det=gr.df_make(x=[0, 1, 2, 3, 4])) ## Model to fit md_param = (gr.Model() >> gr.cp_function( fun=lambda x: x[2] * np.exp(x[0] * x[1]), var=["x", "c", "a"], out=["y"]) >> gr.cp_bounds(c=[0, 4], a=[0.1, 2.0])) ## Fit the model md_fit = df_data >> gr.ft_nls( md=md_param, verbose=False, uq_method="linpool", ) ## Unidentifiable model throws warning # ------------------------- md_unidet = (gr.Model() >> gr.cp_function( fun=lambda x: x[2] / x[3] * np.exp(x[0] * x[1]), var=["x", "c", "a", "z"], out=["y"], ) >> gr.cp_bounds(c=[0, 4], a=[0.1, 2.0], z=[0, 1])) with self.assertWarns(RuntimeWarning): gr.fit_nls( df_data, md=md_unidet, uq_method="linpool", ) ## True parameters in wide confidence region # ------------------------- alpha = 1e-3 self.assertTrue( (md_fit.density.marginals["c"].q(alpha / 2) <= c_true) and (c_true <= md_fit.density.marginals["c"].q(1 - alpha / 2))) self.assertTrue( (md_fit.density.marginals["a"].q(alpha / 2) <= a_true) and (a_true <= md_fit.density.marginals["a"].q(1 - alpha / 2))) ## Model with fixed parameter # ------------------------- md_fixed = (gr.Model() >> gr.cp_function( fun=lambda x: x[2] * np.exp(x[0] * x[1]), var=["x", "c", "a"], out=["y"]) >> gr.cp_bounds(c=[0, 4], a=[1, 1])) md_fit_fixed = df_data >> gr.ft_nls( md=md_fixed, verbose=False, uq_method="linpool") # Test that fixed model can evaluate successfully gr.eval_sample(md_fit_fixed, n=1, df_det="nom") ## Trajectory model # ------------------------- md_base = models.make_trajectory_linear() md_fit = data.df_trajectory_windowed >> gr.ft_nls( md=md_base, method="SLSQP", tol=1e-3) df_tmp = md_fit >> gr.ev_nominal(df_det="nom") ## Select output for fitting # ------------------------- # Split model has inconsistent "true" parameter value md_split = (gr.Model("Split") >> gr.cp_vec_function( fun=lambda df: gr.df_make( f=1 * df.c * df.x, g=2 * df.c * df.x, ), var=["c", "x"], out=["f", "g"], ) >> gr.cp_bounds( x=(-1, +1), c=(-1, +1), )) df_split = (gr.df_make(x=gr.linspace(-1, +1, 100)) >> gr.tf_mutate( f=X.x, g=X.x)) # Fitting both outputs: cannot achieve mse ~= 0 df_both = (df_split >> gr.ft_nls(md_split, out=["f", "g"]) >> gr.ev_df(df_split >> gr.tf_rename(f_t=X.f, g_t=X.g)) >> gr.tf_summarize( mse_f=gr.mse(X.f, X.f_t), mse_g=gr.mse(X.g, X.g_t), )) self.assertTrue(df_both.mse_f[0] > 0) self.assertTrue(df_both.mse_g[0] > 0) # Fitting "f" only df_f = (df_split >> gr.ft_nls(md_split, out=["f"]) >> gr.ev_df(df_split >> gr.tf_rename(f_t=X.f, g_t=X.g)) >> gr.tf_summarize( mse_f=gr.mse(X.f, X.f_t), mse_g=gr.mse(X.g, X.g_t), )) self.assertTrue(df_f.mse_f[0] < 1e-16) self.assertTrue(df_f.mse_g[0] > 0) # Fitting "g" only df_g = (df_split >> gr.ft_nls(md_split, out=["g"]) >> gr.ev_df(df_split >> gr.tf_rename(f_t=X.f, g_t=X.g)) >> gr.tf_summarize( mse_f=gr.mse(X.f, X.f_t), mse_g=gr.mse(X.g, X.g_t), )) self.assertTrue(df_g.mse_f[0] > 0) self.assertTrue(df_g.mse_g[0] < 1e-16)