Exemple #1
0
 def test_proflogit_with_patsy_build_in_transformation_functions(self):
     """Test patsy build-in transformation functions."""
     # demo_data: returns a dict
     # Categorical variables are returned as a list of strings.
     # Numerical data sampled from a normal distribution (fixed seed)
     rng = np.random.RandomState(42)
     data = patsy.demo_data("a", "b", "x1", "x2", nlevels=3)
     y = rng.randint(2, size=len(data["a"]))
     # dmatrix: to create the design matrix alone (no left-hand side)
     # Important that `data` can be indexed like a Python dictionary,
     # e.g., `data[varname]`. It can also be a pandas.DataFrame
     # Strings and booleans are treated as categorical variables, where
     # the first level is the baseline.
     X = patsy.dmatrix(
         "a + b + standardize(x1) + standardize(x2)",
         data,
     )
     pfl = ProfLogitCCP(rga_kws={
         "niter": 10,
         "disp": False,
         "random_state": 42
     }, )
     pfl.fit(X, y)
     npt.assert_array_almost_equal(
         pfl.rga.res.x,
         [0.71321495, 0.0, -0.6815996, 0.0, 0.0, -0.92505635, 0.0],
     )
     self.assertAlmostEqual(pfl.rga.res.fun, 12.2837788495)
     empc_score = pfl.score(X, y)
     self.assertAlmostEqual(empc_score, 12.4444444445)
Exemple #2
0
 def test_proflogit_with_patsy_demo_data_no_intercept(self):
     """
     Test on simple demo data from patsy w/o intercept.
     """
     # demo_data: returns a dict
     # categorical variables are returned as a list of strings.
     # Numerical data sampled from a normal distribution (fixed seed)
     rng = np.random.RandomState(42)
     data = patsy.demo_data("a", "b", "x1", "x2", nlevels=3)
     y = rng.randint(2, size=len(data["a"]))
     # dmatrix: to create the design matrix alone (no left-hand side)
     X = patsy.dmatrix("a + b + x1 + x2 - 1", data)
     pfl = ProfLogitCCP(
         rga_kws={
             "niter": 10,
             "disp": False,
             "random_state": 42
         },
         intercept=False,
     )
     pfl.fit(X, y)
     npt.assert_array_almost_equal(
         pfl.rga.res.x,
         [0.27466536, 0.0, -0.24030505, 0.0, 0.0, -0.82215168, 0.0],
     )
     self.assertAlmostEqual(pfl.rga.res.fun, 12.310732234783764)
     empc_score = pfl.score(X, y)
     self.assertAlmostEqual(empc_score, 12.4444444445)
Exemple #3
0
 def test_proflogit_with_patsy_demo_data(self):
     """Test on simple categorical/numerical demo data from patsy."""
     # demo_data: returns a dict
     # Categorical variables are returned as a list of strings.
     # Numerical data sampled from a normal distribution (fixed seed)
     rng = np.random.RandomState(42)
     data = patsy.demo_data("a", "b", "x1", "x2", nlevels=3)
     y = rng.randint(2, size=len(data["a"]))
     # dmatrix: to create the design matrix alone (no left-hand side)
     # Important that `data` can be indexed like a Python dictionary,
     # e.g., `data[varname]`. It can also be a pandas.DataFrame
     X = patsy.dmatrix("a + b + x1 + x2", data)
     pfl = ProfLogitCCP(rga_kws={
         "niter": 10,
         "disp": False,
         "random_state": 42
     }, )
     pfl.fit(X, y)
     npt.assert_array_almost_equal(
         pfl.rga.res.x,
         [
             0.26843982,  # Intercept
             0.0,  # Categorical variable 'a' - level a2
             -0.21947001,  # Categorical variable 'a' - level a3
             0.12036944,  # Categorical variable 'b' - level b2
             0.0,  # Categorical variable 'b' - level b3
             -0.47514314,  # Numeric variable 'x1'
             -0.08812723,  # Numeric variable 'x2'
         ],
     )
     self.assertAlmostEqual(pfl.rga.res.fun, 12.3541334628)
     empc_score = pfl.score(X, y)
     self.assertAlmostEqual(empc_score, 12.4444444445)
Exemple #4
0
 def test_proflogitccp_access_fit_method_directly(self):
     dat = np.loadtxt("../data/one-predictor.dat")
     y = dat[:, 0]
     x = dat[:, 1]
     m = np.mean(x)
     s = np.std(x)
     z = (x - m) / s
     z = z.reshape(len(x), 1)
     dmat = np.c_[np.ones_like(z), z]
     pfl = ProfLogitCCP(rga_kws={
         "niter": 10,
         "disp": False,
         "random_state": 42
     }, ).fit(X=dmat, y=y)
     npt.assert_array_almost_equal(pfl.rga.res.x, [-0.29866826, 0.00052175])
     self.assertAlmostEqual(pfl.rga.res.fun, 5.97213086792)
     empc_score = pfl.score(X=dmat, y_true=y)
     self.assertAlmostEqual(empc_score, 5.9721830425745903)
Exemple #5
0
 def test_raise_assertion_error_rga_kws(self):
     """
     Should raise AssertionError when rga_kws is no dict.
     """
     with self.assertRaises(AssertionError):
         ProfLogitCCP(
             rga_kws=[],
             intercept=False,  # Incorrect
         )
Exemple #6
0
 def test_raise_key_error_reg_kws(self):
     """
     Should raise KeyError when reg_kws keys are incorrect.
     """
     with self.assertRaises(KeyError):
         ProfLogitCCP(
             rga_kws={"niter": 1},
             reg_kws={"non-sense": True},  # Incorrect
             intercept=False,
         )
Exemple #7
0
 def test_raise_type_error_rga_kws_wrong_param(self):
     """
     Should raise TypeError an unexpected keyword argument in rga_kws.
     """
     x = np.random.rand(5, 2)
     y = np.random.randint(2, size=x.shape[0])
     with self.assertRaises(TypeError):
         ProfLogitCCP(
             rga_kws={
                 "maxiter": 10
             },
             intercept=False,  # Incorrect
         ).fit(x, y)
Exemple #8
0
 def test_simple_proflogit_str_label(self):
     """Use one predictor."""
     dat = np.loadtxt("../data/one-predictor.dat")
     y = ["yes" if v == 1 else "no" for v in dat[:, 0]]
     x = dat[:, 1]
     m = np.mean(x)
     s = np.std(x)
     z = (x - m) / s
     z = z.reshape(len(x), 1)
     dmat = np.c_[np.ones_like(z), z]
     pfl = ProfLogitCCP(
         rga_kws={
             "niter": 10,
             "disp": False,
             "random_state": 42
         },
         empc_kws={"case_label": "yes"},
     )
     pfl.fit(X=dmat, y=y)
     npt.assert_array_almost_equal(pfl.rga.res.x, [-0.29866826, 0.00052175])
     self.assertAlmostEqual(pfl.rga.res.fun, 5.97213086792)
     empc_score = pfl.score(X=dmat, y_true=y)
     self.assertAlmostEqual(empc_score, 5.9721830425745903)
Exemple #9
0
 def test_default_rga_kws(self):
     pfl = ProfLogitCCP()
     target_value = {"niter": 1000, "niter_diff": 250}
     self.assertDictEqual(pfl.rga_kws, target_value)