def test_drop_out(self): """Checks that output column names are properly dropped""" md = gr.Model() >> gr.cp_function(lambda x: x[0] + 1, var=1, out=1) df_in = gr.df_make(x0=[0, 1, 2], y0=[0, 1, 2]) df_true = gr.df_make(x0=[0, 1, 2], y0=[1, 2, 3]) df_res = md >> gr.ev_df(df=df_in) self.assertTrue(gr.df_equal(df_res, df_true, close=True))
def test_explode(self): df_base = gr.df_make(x=[1, 2], y=[[3, 4], [5, 6]]) df_str = gr.df_make(x=[1, 2], y=[["3", "4"], ["5", "6"]]) df_true = gr.df_make(x=[1, 1, 2, 2], y=[3, 4, 5, 6]) df_res = df_base >> gr.tf_explode(X.y) df_res_s = df_base >> gr.tf_explode(X.y, convert=True) self.assertTrue(gr.df_equal(df_true, df_res, close=True)) self.assertTrue(gr.df_equal(df_true, df_res_s, close=True))
def test_sir(self): from numpy import real from scipy.special import lambertw ## Verification test # Test parameters I0 = 1 S0 = 99 R0 = 0 beta = 0.5 gamma = 0.2 # Asymptotic solution parameters N = I0 + S0 + R0 R_0 = beta / gamma s_0 = S0 / N r_0 = R0 / N # Asymptotic solution S_inf = real(-(1 / R_0) * lambertw(-s_0 * R_0 * np.exp(-R_0 * (1 - r_0))) * N) ## Base tolerance md_sir = models.make_sir() df_inf = gr.eval_df( md_sir, gr.df_make( t=1e6, # Approximation of t -> +\infty I0=I0, N=N, beta=beta, gamma=gamma, )) S_inf_comp = df_inf.S.values[-1] # Check relative tolerance self.assertTrue(abs(S_inf - S_inf_comp) / S_inf < 1e-3) self.assertTrue(abs(S_inf - S_inf_comp) / S_inf > 1e-5) ## Refined tolerance md_sir = models.make_sir(rtol=1e-6) df_inf = gr.eval_df( md_sir, gr.df_make( t=1e6, # Approximation of t -> +\infty I0=I0, N=N, beta=beta, gamma=gamma, )) S_inf_comp = df_inf.S.values[-1] # Check relative tolerance self.assertTrue(abs(S_inf - S_inf_comp) / S_inf < 1e-5)
def test_dropna(self): df = gr.df_make(x=[1.0, 2.0, 3.0], y=[1.0, np.nan, 3.0], z=[1.0, 2.0, np.nan]) df_true_default = gr.df_make(x=[1.0], y=[1.0], z=[1.0]) df_true_y = gr.df_make(x=[1.0, 3.0], y=[1.0, 3.0], z=[1.0, np.nan]) df_res_default = df >> gr.tf_dropna() df_res_y = df >> gr.tf_dropna(subset=["y"]) self.assertTrue(gr.df_equal(df_true_default, df_res_default)) self.assertTrue(gr.df_equal(df_true_y, df_res_y))
def test_df_make(self): # Check correctness df_true = pd.DataFrame(dict(x=[0, 1], y=[0, 0], z=[1, 1])) df_res = gr.df_make(x=[0, 1], y=[0], z=1) self.assertTrue(gr.df_equal(df_true, df_res)) # Check for mismatch with self.assertRaises(ValueError): gr.df_make(x=[1, 2, 3], y=[1, 2]) # Catch an intention operator with self.assertRaises(ValueError): gr.df_make(y=DF.x)
def test_corr(self): df_data = gr.df_make(x=[1., 2., 3., 4.]) df_data["y"] = 0.5 * df_data.x df_data["z"] = -0.5 * df_data.x self.assertTrue(abs(gr.corr(df_data.x, df_data.y) - 1.0) < 1e-6) self.assertTrue(abs(gr.corr(df_data.x, df_data.z) + 1.0) < 1e-6)
def test_pivot_wider_representation_index(self): """ Test if pivot_wider can handle duplicate entries if a representation index is present """ stang = data.df_stang_wide long = gr.tran_pivot_longer( stang, index_to = "idx", columns=["E_00","mu_00","E_45","mu_45","E_90","mu_90"], names_to="var", values_to="val" ) wide = gr.tran_pivot_wider( long, names_from="var", values_from="val" ) expected = gr.df_make( idx=[0,1,2,3,4,5,6,7,8], thick=[0.022,0.022,0.032,0.032,0.064,0.064,0.081,0.081,0.081], alloy=[" al_24st"," al_24st"," al_24st"," al_24st"," al_24st", " al_24st"," al_24st"," al_24st"," al_24st"], E_00=[10600.000,10600.000,10400.000,10300.000,10500.000,10700.000, 10000.000,10100.000,10000.000], E_45=[10700.000,10500.000,10400.000,10500.000,10400.000,10500.00, 10000.000,9900.000,-1.0], E_90=[10500.000,10700.000,10300.000,10400.000,10400.000,10500.000, 9900.000,10000.000,9900.000], mu_00=[0.321,0.323,0.329,0.319,0.323,0.328,0.315,0.312,0.311], mu_45=[0.329,0.331,0.318,0.326,0.331,0.328,0.320,0.312,-1.000], mu_90=[0.310,0.323,0.322,0.330,0.327,0.320,0.314,0.316,0.314] ) assert_frame_equal(wide, expected)
def test_pivot_longer_dot_value_and_names_sep(self): """ Test pivot_longer when it receives the .value and names_sep """ DF = gr.Intention() wide = gr.df_make(x=range(0, 6)) wide = gr.tran_mutate( wide, y_Trend=DF.x**2, y_Variability=random.normal(size=6), y_Mixed=DF.x**2 + random.normal(size=6), ) long = gr.tran_pivot_longer( wide, columns=["y_Trend", "y_Variability", "y_Mixed"], names_to=(".value", "type"), names_sep="_" ) check = ["x", "type", "y"] col_check = [x for x in long.columns.values if x in check] result = False if set(col_check) == set(check): result = True self.assertTrue(result)
def test_transforms(self): ## Setup df_corr = pd.DataFrame(dict(var1=["x"], var2=["y"], corr=[0.5])) Sigma_h = np.linalg.cholesky(np.array([[1.0, 0.5], [0.5, 1.0]])) md = ( gr.Model() >> gr.cp_marginals(x=dict(dist="norm", loc=0, scale=1), y=dict(dist="norm", loc=0, scale=1)) >> gr.cp_copula_gaussian(df_corr=df_corr)) ## Copula and marginals have same var_rand order self.assertTrue( list(md.density.marginals) == md.density.copula.var_rand) ## Transforms invariant z = np.array([0, 0]) x = md.z2x(z) zp = md.x2z(x) self.assertTrue(np.all(z == zp)) df_z = gr.df_make(x=0.0, y=0.0) df_x = md.norm2rand(df_z) df_zp = md.rand2norm(df_x) self.assertTrue(gr.df_equal(df_z, df_zp)) ## Jacobian accurate dxdz_fd = np.zeros((2, 2)) dxdz_fd[0, :] = (md.z2x(z + np.array([h, 0])) - md.z2x(z)) / h dxdz_fd[1, :] = (md.z2x(z + np.array([0, h])) - md.z2x(z)) / h dxdz_p = md.dxdz(z) self.assertTrue(np.allclose(dxdz_fd, dxdz_p))
def test_sample(self): ## Accurate n = 2 df_res = gr.eval_sample(self.md, n=n, df_det="nom", seed=101) np.random.seed(101) df_truth = pd.DataFrame({"x0": np.random.random(n)}) df_truth["y0"] = df_truth["x0"] self.assertTrue(gr.df_equal(df_res, df_truth)) ## Rounding df_round = gr.eval_sample(self.md, n=n + 0.1, df_det="nom", seed=101) self.assertTrue(gr.df_equal(df_round, df_truth)) ## Pass-through df_pass = gr.eval_sample(self.md, n=n, skip=True, df_det="nom", seed=101) self.assertTrue(gr.df_equal(df_pass[["x0"]], df_truth[["x0"]])) ## Optional observation index df_idx = gr.eval_sample( self.md_mixed, n=n, df_det=gr.df_make(x0=[-1, 0, 1]), seed=101, index="idx", ) self.assertTrue(len(set(df_idx.idx)) == n)
def test_pivot_longer(self): """ Test basic functionality of pivot_longer """ wide = gr.df_make(One=[1,2,3], Two=[4,5,6]) long = gr.tran_pivot_longer( wide, columns=("One","Two"), names_to="columns", values_to="values" ) expected = gr.df_make( columns=["One","One","One","Two","Two","Two"], values=[1,2,3,4,5,6] ) assert_frame_equal(long, expected)
def test_spread(self): columns = self.df_elongated.columns.tolist() id_cols = ["_ID"] df = self.df_elongated.copy() df["temp_index"] = df["_ID"].values df = df.set_index("temp_index") spread_data = df[["variable", "value"]] spread_data = spread_data.pivot(columns="variable", values="value") converted_spread = spread_data.copy() columns_to_convert = [col for col in spread_data if col not in columns] converted_spread = gr.convert_type(converted_spread, columns_to_convert) df = df[["_ID"]].drop_duplicates() df_spread = df.merge( spread_data, left_index=True, right_index=True ).reset_index(drop=True) df_conv = df.merge( converted_spread, left_index=True, right_index=True ).reset_index(drop=True) d_spread = self.df_elongated >> gr.tf_spread("variable", "value") d_spread_conv = self.df_elongated >> gr.tf_spread( "variable", "value", convert=True ) self.assertTrue(df_spread.equals(d_spread)) self.assertTrue(df_conv.equals(d_spread_conv)) ## Test fill df_base = gr.df_make( x=[1, 2, 3, 4, 5], y=["a", "b", "c", "a", "b"], idx=[0, 0, 0, 1, 1] ) df_true = gr.df_make( a=[1, 4], b=[2, 5], c=[3, 0], idx=[0, 1] ) df_res = df_base >> gr.tf_spread(X.y, X.x, fill=0) self.assertTrue(gr.df_equal(df_true, df_res, close=True))
def test_iocorr(self): df = ( gr.df_make(x=[1., 2., 3., 4.]) >> gr.tf_mutate( y=+0.5 * DF.x, z=-0.5 * DF.x, ) >> gr.tf_iocorr(var=["x"], out=["y", "z"]) ) df_true = gr.df_make( var=["x", "x"], out=["y", "z"], rho=[1.0, -1.0], ) ## Check for correct values self.assertTrue(gr.df_equal(df, df_true))
def test_fit_polyridge(self): """Test the functionality and correctness of ft_polyridge() """ df_test = (gr.df_make(x=range(10)) >> gr.tf_outer( gr.df_make(y=range(10))) >> gr.tf_outer(gr.df_make(z=range(10))) >> gr.tf_mutate(f=DF.x - DF.y)) md = gr.fit_polyridge(df_test, out="f", n_degree=1, n_dim=1) df1 = gr.eval_df(md, df=gr.df_make(x=[2, 1], y=[1], z=[0])) df2 = gr.df_make(x=[2, 1], y=[1], z=[0], f_mean=[1, 0]) self.assertTrue(gr.df_equal( df1, df2, close=True, ))
def test_nls(self): ## Setup md_feat = ( gr.Model() >> gr.cp_function(fun=lambda x: x[0] * x[1] + x[2], var=3, out=1,) >> gr.cp_bounds(x0=[-1, +1], x2=[0, 0]) >> gr.cp_marginals(x1=dict(dist="norm", loc=0, scale=1)) ) md_const = ( gr.Model() >> gr.cp_function(fun=lambda x: x[0], var=1, out=1) >> gr.cp_bounds(x0=(-1, +1)) ) df_response = md_feat >> gr.ev_df( df=gr.df_make(x0=0.1, x1=[-1, -0.5, +0, +0.5, +1], x2=0) ) df_data = df_response[["x1", "y0"]] ## Model with features df_true = gr.df_make(x0=0.1) df_fit = md_feat >> gr.ev_nls(df_data=df_data, append=False) pd.testing.assert_frame_equal( df_fit, df_true, check_exact=False, check_dtype=False, check_column_type=False, ) ## Fitting synonym md_feat_fit = df_data >> gr.ft_nls(md=md_feat, verbose=False) self.assertTrue(set(md_feat_fit.var) == set(["x1", "x2"])) ## Constant model df_const = gr.df_make(x0=0) df_fit = md_const >> gr.ev_nls(df_data=gr.df_make(y0=[-1, 0, +1])) pd.testing.assert_frame_equal( df_fit, df_const, check_exact=False, check_dtype=False, check_column_type=False, )
def test_nominal(self): """Checks the implementation of nominal values""" md = gr.Model() >> gr.cp_bounds( x0=[-1, +1], x1=[0.1, np.Inf], x2=[-np.Inf, -0.1], ) df_true = gr.df_make(x0=0.0, x1=+0.1, x2=-0.1) df_res = gr.eval_nominal(md, df_det="nom", skip=True) self.assertTrue(gr.df_equal(df_res, df_true))
def test_tran_md(self): md = models.make_test() ## Check for identical responses df = gr.df_make(x0=1, x1=1, x2=1) df_ev = gr.eval_df(md, df=df) df_tf = gr.tran_md(df, md=md) self.assertTrue(gr.df_equal(df_ev, df_tf))
def test_tran_poly(self): df = gr.df_make(x=[0.0, 1.0, 0.0], y=[0.0, 0.0, 1.0], z=[1.0, 2.0, 3.0],) df_true = df.copy() df_true["1"] = [1.0, 1.0, 1.0] df_true["x^2"] = [0.0, 1.0, 0.0] df_true["x y"] = [0.0, 0.0, 0.0] df_true["y^2"] = [0.0, 0.0, 1.0] df_res = gr.tran_poly(df, var=["x", "y"], degree=2, keep=True) self.assertTrue(gr.df_equal(df_true, df_res[df_true.columns]))
def setUp(self): ## Linear limit state w/ MPP off initial guess self.beta_true = 3 self.md = ( gr.Model() >> gr.cp_function( fun=lambda x: self.beta_true * 2 - x[0] - np.sqrt(3) * x[1], var=2, out=["g"], ) >> gr.cp_marginals( x0=dict(dist="norm", loc=0, scale=1, sign=1), x1=dict(dist="norm", loc=0, scale=1, sign=1), ) >> gr.cp_copula_independence() ) ## Linear limit state w/ lognormal marginals self.md_log = ( gr.Model() >> gr.cp_vec_function( fun=lambda df: gr.df_make( g=gr.exp(gr.sqrt(2) * 1) - df.x * df.y ), var=["x", "y"], out=["g"] ) >> gr.cp_marginals( x=dict(dist="lognorm", loc=0, scale=1, s=1), y=dict(dist="lognorm", loc=0, scale=1, s=1), ) >> gr.cp_copula_independence() ) self.df_mpp = gr.df_make( x=gr.exp(gr.sqrt(2)/2), y=gr.exp(gr.sqrt(2)/2), beta_g=1.0, g=0.0, ) ## Cantilever beam for flatten test self.md_beam = models.make_cantilever_beam()
def test_summarize(self): df = gr.df_make( x=["A", "A", "B", "B"], y=["A", "B", "A", "B"], ) df_true1 = gr.df_make( x=["A", "B"], n=[2, 2], ) df_true2 = gr.df_make( x=["A", "A", "B", "B"], y=["A", "B", "A", "B"], n=[1, 1, 1, 1], ) df_res1 = (df >> gr.tf_count(DF.x)) self.assertTrue(df_true1.equals(df_res1)) df_res2 = (df >> gr.tf_count(DF.x, DF.y)) self.assertTrue(df_true2.equals(df_res2))
def test_tran_polyridge(self): """Test the functionality and correctness of tran_polyridge() """ ## Setup df_test = (gr.df_make(x=range(10)) >> gr.tf_outer( gr.df_make(y=range(10))) >> gr.tf_outer(gr.df_make(z=range(10))) >> gr.tf_mutate(f=DF.x - DF.y)) ## Assertions # No `out` column with self.assertRaises(ValueError): gr.tran_polyridge(df_test) # Unrecognized `out` column with self.assertRaises(ValueError): gr.tran_polyridge(df_test, out="foo") # Unrecognized `var` column(s) with self.assertRaises(ValueError): gr.tran_polyridge(df_test, var=["foo", "bar"]) # Invalid degree with self.assertRaises(ValueError): gr.tran_polyridge(df_test, out="f", n_degree=1, n_dim=2) ## Correctness df_res = (df_test >> gr.tf_polyridge( out="f", n_dim=1, n_degree=1, )) df_true = gr.df_make(x=1 / gr.sqrt(2), y=-1 / gr.sqrt(2), z=0) self.assertTrue(gr.df_equal(df_res, df_true, close=True)) ## Higher-dimensional functionality df_higher = (gr.df_grid( x=range(10), y=range(10), z=range(10), ) >> gr.tf_mutate(f=DF.x + DF.y + DF.z)) gr.tran_polyridge(df_higher, out="f", n_degree=2, n_dim=2)
def test_pivot_wider_NaN_entries(self): """ Test if pivot_wider returns a table with NaN values for unspecified entries that have no represenational index """ original = gr.df_make(A=[1,2,3], B=[4,5,6]) long = gr.tran_pivot_longer( original, columns=("A", "B"), names_to="var", values_to="value" ) wide = gr.tran_pivot_wider( long, names_from="var", values_from="value" ) expected = gr.df_make( A=[1,2,3,NaN,NaN,NaN], B=[NaN,NaN,NaN,4,5,6] ) assert_frame_equal(wide, expected)
def test_comp_model(self): """Test model composition""" md_inner = ( gr.Model("inner") >> gr.cp_function(fun=lambda x: x[0] + x[1], var=2, out=1) >> gr.cp_marginals(x0=dict(dist="norm", loc=0, scale=1)) >> gr.cp_copula_independence() ) ## Deterministic composition md_det = gr.Model("outer_det") >> gr.cp_md_det(md=md_inner) self.assertTrue(set(md_det.var) == {"x0", "x1"}) self.assertTrue(md_det.out == ["y0"]) gr.eval_df(md_det, df=gr.df_make(x0=0, x1=0)) ## Deterministic composition md_sample = gr.Model("outer_det") >> gr.cp_md_sample( md=md_inner, param=dict(x0=("loc", "scale")) ) self.assertTrue(set(md_sample.var) == {"x0_loc", "x0_scale", "x1"}) self.assertTrue(set(md_sample.out) == {"y0"}) gr.eval_df(md_sample, df=gr.df_make(x0_loc=0, x0_scale=1, x1=0))
def test_stratum_min(self): df_test = gr.df_make( x=[1, 2, 0, 1, 2, 0, 1, 2], y=[0, 0, 1, 1, 1, 2, 2, 2], p=[1, 2, 1, 2, 3, 2, 3, 4], ) # Test for accuracy self.assertTrue( (df_test >> gr.tf_mutate(p_comp=gr.stratum_min(X.x, X.y)) >> gr.tf_mutate(flag=X.p == X.p_comp)).flag.all()) # Check for ValueError with self.assertRaises(ValueError): gr.stratum_min([1], [1, 2, 3])
def test_corr(self): df_data = gr.df_make(x=[1., 2., 3., 4.]) df_data["y"] = 0.5 * df_data.x df_data["z"] = -0.5 * df_data.x self.assertTrue(abs(gr.corr(df_data.x, df_data.y) - 1.0) < 1e-6) self.assertTrue(abs(gr.corr(df_data.x, df_data.z) + 1.0) < 1e-6) ## Test NaN handling df_nan = (df_data >> gr.tf_mutate( x=gr.if_else(X.x == 1, gr.NaN, X.x), y=gr.if_else(X.x == 4, gr.NaN, X.y), )) with self.assertRaises(ValueError): gr.corr(df_nan.x, df_nan.y) self.assertTrue( abs(gr.corr(df_nan.x, df_nan.y, nan_drop=True) - 1.0) < 1e-6)
def test_pivot_longer_no_representation_index(self): """ Test if pivot_longer does not produce a representation index for nx2 DataFrame that has no index_to call """ wide = gr.df_make(A=[1,2,3], B=[4,5,6]) long = gr.tran_pivot_longer( wide, columns=("A", "B"), names_to="var", values_to="value" ) expected = DataFrame( { "var": ["A","A","A","B","B","B"], "value": [1,2,3,4,5,6] } ) assert_frame_equal(long, expected)
def test_pivot_longer_names_sep_position_thrice(self): """ Test if pivot_longer works with names_sep argument being a position """ wide = gr.df_make(A_1_hello=[1,2,3], B_2_bye=[4,5,6]) long = gr.tran_pivot_longer( wide, names_sep=[1, 3], columns=["A_1_hello","B_2_bye"], names_to=("letter","num","saying"), values_to="val" ) names_to = ["letter","num","saying"] names_to_check = [x for x in long.columns.values if x in names_to] result = False if names_to == names_to_check: result = True self.assertTrue(result)
def test_pivot_longer_names_sep_thrice(self): """ Test if pivot_longer properly works with names_sep having to split columns into 3 or more """ wide = gr.df_make(A_1_hello=[1,2,3], B_2_bye=[4,5,6]) long = gr.tran_pivot_longer( wide, names_sep="_", columns=["A_1_hello","B_2_bye"], names_to=("letter","num","saying"), values_to="val" ) names_to = ["letter","num","saying"] names_to_check = [x for x in long.columns.values if x in names_to] result = False if names_to == names_to_check: result = True self.assertTrue(result)
def test_pivot_longer_names_sep_multiple_seps(self): """ Test if pivot_longer properly works with names_sep having column names with varying amount of seps """ wide = gr.df_make(A_1_hello=[1,2,3], B_2=[4,5,6]) long = gr.tran_pivot_longer( wide, names_sep="_", columns=["A_1_hello","B_2"], names_to=("letter","num","saying"), values_to="val" ) names_to = ["letter","num","saying"] names_to_check = [x for x in long.columns.values if x in names_to] result = False if names_to == names_to_check: result = True self.assertTrue(result)
def test_nls(self): ## Ground-truth model c_true = 2 a_true = 1 md_true = (gr.Model() >> gr.cp_function( fun=lambda x: a_true * np.exp(x[0] * c_true) + x[1], var=["x", "epsilon"], out=["y"], ) >> gr.cp_marginals(epsilon={ "dist": "norm", "loc": 0, "scale": 0.5 }) >> gr.cp_copula_independence()) df_data = md_true >> gr.ev_monte_carlo( n=5, seed=101, df_det=gr.df_make(x=[0, 1, 2, 3, 4])) ## Model to fit md_param = (gr.Model() >> gr.cp_function( fun=lambda x: x[2] * np.exp(x[0] * x[1]), var=["x", "c", "a"], out=["y"]) >> gr.cp_bounds(c=[0, 4], a=[0.1, 2.0])) ## Fit the model md_fit = df_data >> gr.ft_nls( md=md_param, verbose=False, uq_method="linpool", ) ## Unidentifiable model throws warning # ------------------------- md_unidet = (gr.Model() >> gr.cp_function( fun=lambda x: x[2] / x[3] * np.exp(x[0] * x[1]), var=["x", "c", "a", "z"], out=["y"], ) >> gr.cp_bounds(c=[0, 4], a=[0.1, 2.0], z=[0, 1])) with self.assertWarns(RuntimeWarning): gr.fit_nls( df_data, md=md_unidet, uq_method="linpool", ) ## True parameters in wide confidence region # ------------------------- alpha = 1e-3 self.assertTrue( (md_fit.density.marginals["c"].q(alpha / 2) <= c_true) and (c_true <= md_fit.density.marginals["c"].q(1 - alpha / 2))) self.assertTrue( (md_fit.density.marginals["a"].q(alpha / 2) <= a_true) and (a_true <= md_fit.density.marginals["a"].q(1 - alpha / 2))) ## Model with fixed parameter # ------------------------- md_fixed = (gr.Model() >> gr.cp_function( fun=lambda x: x[2] * np.exp(x[0] * x[1]), var=["x", "c", "a"], out=["y"]) >> gr.cp_bounds(c=[0, 4], a=[1, 1])) md_fit_fixed = df_data >> gr.ft_nls( md=md_fixed, verbose=False, uq_method="linpool") # Test that fixed model can evaluate successfully gr.eval_monte_carlo(md_fit_fixed, n=1, df_det="nom") ## Trajectory model # ------------------------- md_base = models.make_trajectory_linear() md_fit = data.df_trajectory_windowed >> gr.ft_nls( md=md_base, method="SLSQP", tol=1e-3) df_tmp = md_fit >> gr.ev_nominal(df_det="nom")