def test_proflogit_with_patsy_build_in_transformation_functions(self): """Test patsy build-in transformation functions.""" # demo_data: returns a dict # Categorical variables are returned as a list of strings. # Numerical data sampled from a normal distribution (fixed seed) rng = np.random.RandomState(42) data = patsy.demo_data("a", "b", "x1", "x2", nlevels=3) y = rng.randint(2, size=len(data["a"])) # dmatrix: to create the design matrix alone (no left-hand side) # Important that `data` can be indexed like a Python dictionary, # e.g., `data[varname]`. It can also be a pandas.DataFrame # Strings and booleans are treated as categorical variables, where # the first level is the baseline. X = patsy.dmatrix( "a + b + standardize(x1) + standardize(x2)", data, ) pfl = ProfLogitCCP(rga_kws={ "niter": 10, "disp": False, "random_state": 42 }, ) pfl.fit(X, y) npt.assert_array_almost_equal( pfl.rga.res.x, [0.71321495, 0.0, -0.6815996, 0.0, 0.0, -0.92505635, 0.0], ) self.assertAlmostEqual(pfl.rga.res.fun, 12.2837788495) empc_score = pfl.score(X, y) self.assertAlmostEqual(empc_score, 12.4444444445)
def test_proflogit_with_patsy_demo_data_no_intercept(self): """ Test on simple demo data from patsy w/o intercept. """ # demo_data: returns a dict # categorical variables are returned as a list of strings. # Numerical data sampled from a normal distribution (fixed seed) rng = np.random.RandomState(42) data = patsy.demo_data("a", "b", "x1", "x2", nlevels=3) y = rng.randint(2, size=len(data["a"])) # dmatrix: to create the design matrix alone (no left-hand side) X = patsy.dmatrix("a + b + x1 + x2 - 1", data) pfl = ProfLogitCCP( rga_kws={ "niter": 10, "disp": False, "random_state": 42 }, intercept=False, ) pfl.fit(X, y) npt.assert_array_almost_equal( pfl.rga.res.x, [0.27466536, 0.0, -0.24030505, 0.0, 0.0, -0.82215168, 0.0], ) self.assertAlmostEqual(pfl.rga.res.fun, 12.310732234783764) empc_score = pfl.score(X, y) self.assertAlmostEqual(empc_score, 12.4444444445)
def test_proflogit_with_patsy_demo_data(self): """Test on simple categorical/numerical demo data from patsy.""" # demo_data: returns a dict # Categorical variables are returned as a list of strings. # Numerical data sampled from a normal distribution (fixed seed) rng = np.random.RandomState(42) data = patsy.demo_data("a", "b", "x1", "x2", nlevels=3) y = rng.randint(2, size=len(data["a"])) # dmatrix: to create the design matrix alone (no left-hand side) # Important that `data` can be indexed like a Python dictionary, # e.g., `data[varname]`. It can also be a pandas.DataFrame X = patsy.dmatrix("a + b + x1 + x2", data) pfl = ProfLogitCCP(rga_kws={ "niter": 10, "disp": False, "random_state": 42 }, ) pfl.fit(X, y) npt.assert_array_almost_equal( pfl.rga.res.x, [ 0.26843982, # Intercept 0.0, # Categorical variable 'a' - level a2 -0.21947001, # Categorical variable 'a' - level a3 0.12036944, # Categorical variable 'b' - level b2 0.0, # Categorical variable 'b' - level b3 -0.47514314, # Numeric variable 'x1' -0.08812723, # Numeric variable 'x2' ], ) self.assertAlmostEqual(pfl.rga.res.fun, 12.3541334628) empc_score = pfl.score(X, y) self.assertAlmostEqual(empc_score, 12.4444444445)
def test_proflogitccp_access_fit_method_directly(self): dat = np.loadtxt("../data/one-predictor.dat") y = dat[:, 0] x = dat[:, 1] m = np.mean(x) s = np.std(x) z = (x - m) / s z = z.reshape(len(x), 1) dmat = np.c_[np.ones_like(z), z] pfl = ProfLogitCCP(rga_kws={ "niter": 10, "disp": False, "random_state": 42 }, ).fit(X=dmat, y=y) npt.assert_array_almost_equal(pfl.rga.res.x, [-0.29866826, 0.00052175]) self.assertAlmostEqual(pfl.rga.res.fun, 5.97213086792) empc_score = pfl.score(X=dmat, y_true=y) self.assertAlmostEqual(empc_score, 5.9721830425745903)
def test_raise_assertion_error_rga_kws(self): """ Should raise AssertionError when rga_kws is no dict. """ with self.assertRaises(AssertionError): ProfLogitCCP( rga_kws=[], intercept=False, # Incorrect )
def test_raise_key_error_reg_kws(self): """ Should raise KeyError when reg_kws keys are incorrect. """ with self.assertRaises(KeyError): ProfLogitCCP( rga_kws={"niter": 1}, reg_kws={"non-sense": True}, # Incorrect intercept=False, )
def test_raise_type_error_rga_kws_wrong_param(self): """ Should raise TypeError an unexpected keyword argument in rga_kws. """ x = np.random.rand(5, 2) y = np.random.randint(2, size=x.shape[0]) with self.assertRaises(TypeError): ProfLogitCCP( rga_kws={ "maxiter": 10 }, intercept=False, # Incorrect ).fit(x, y)
def test_simple_proflogit_str_label(self): """Use one predictor.""" dat = np.loadtxt("../data/one-predictor.dat") y = ["yes" if v == 1 else "no" for v in dat[:, 0]] x = dat[:, 1] m = np.mean(x) s = np.std(x) z = (x - m) / s z = z.reshape(len(x), 1) dmat = np.c_[np.ones_like(z), z] pfl = ProfLogitCCP( rga_kws={ "niter": 10, "disp": False, "random_state": 42 }, empc_kws={"case_label": "yes"}, ) pfl.fit(X=dmat, y=y) npt.assert_array_almost_equal(pfl.rga.res.x, [-0.29866826, 0.00052175]) self.assertAlmostEqual(pfl.rga.res.fun, 5.97213086792) empc_score = pfl.score(X=dmat, y_true=y) self.assertAlmostEqual(empc_score, 5.9721830425745903)
def test_default_rga_kws(self): pfl = ProfLogitCCP() target_value = {"niter": 1000, "niter_diff": 250} self.assertDictEqual(pfl.rga_kws, target_value)