class PerfectCorrelationMultivariateOLSTests(unittest.TestCase): @classmethod def setUpClass(cls): np.random.seed(0) cls.data_longley = sm.datasets.longley.load() cls.X = cls.data_longley.exog cls.Y = np.hstack((cls.data_longley.endog.reshape(-1, 1), cls.data_longley.endog.reshape(-1, 1))) def test_ols(self): self.model = OLS(solver='auto', fit_intercept=True, est_stderr=True, reg_method=None, alpha=0, l1_ratio=0, tol=1e-4, max_iter=100, coef=None, stderr=None, dispersion=None) self.model.fit(self.X, self.Y) # coefficient self.assertEqual(self.model.coef.shape, (2, 7)) np.testing.assert_array_almost_equal( self.model.coef, np.array([[ -3482258.63459582, 15.0618722713733, -0.358191792925910E-01, -2.02022980381683, -1.03322686717359, -0.511041056535807E-01, 1829.15146461355 ], [ -3482258.63459582, 15.0618722713733, -0.358191792925910E-01, -2.02022980381683, -1.03322686717359, -0.511041056535807E-01, 1829.15146461355 ]]).reshape(2, -1), decimal=3) # std.err of coefficient (calibrated by df_resid) self.assertEqual(self.model.stderr.shape, (2, 7)) np.testing.assert_array_almost_equal( old_div(self.model.stderr, np.sqrt(old_div(9., self.data_longley.exog.shape[0]))), np.array([[ 890420.383607373, 84.9149257747669, 0.03349, 0.488399681651699, 0.214274163161675, 0.226073200069370, 455.478499142212 ], [ 890420.383607373, 84.9149257747669, 0.03349, 0.488399681651699, 0.214274163161675, 0.226073200069370, 455.478499142212 ]]).reshape(2, -1), decimal=2) # scale self.assertEqual(self.model.dispersion.shape, (2, 2)) np.testing.assert_array_almost_equal( old_div(self.model.dispersion, (old_div(9., self.data_longley.exog.shape[0]))), np.array([[92936.0061673238, 92936.0061673238], [92936.0061673238, 92936.0061673238]]), decimal=3) # predict np.testing.assert_array_almost_equal( self.Y - self.model.predict(self.X), np.hstack((np.array([ 267.34003, -94.01394, 46.28717, -410.11462, 309.71459, -249.31122, -164.04896, -13.18036, 14.30477, 455.39409, -17.26893, -39.05504, -155.54997, -85.67131, 341.93151, -206.75783 ]).reshape(-1, 1), np.array([ 267.34003, -94.01394, 46.28717, -410.11462, 309.71459, -249.31122, -164.04896, -13.18036, 14.30477, 455.39409, -17.26893, -39.05504, -155.54997, -85.67131, 341.93151, -206.75783 ]).reshape(-1, 1))), decimal=3) # loglike/_per_sample self.assertRaises(ValueError, self.model.loglike_per_sample, self.X, self.Y) def test_ols_l1_regularized(self): # sklearn elastic net and l1 does not take sample_weights, will not test pass def test_ols_l2_regularized(self): # there is a bug in sklearn with weights, it can only use list right now self.model = OLS(solver='auto', fit_intercept=True, est_stderr=True, reg_method='l2', alpha=0.1, l1_ratio=0, tol=1e-4, max_iter=100, coef=None, stderr=None, dispersion=None) self.model.fit(self.X, self.Y, sample_weight=0.5) # coefficient np.testing.assert_array_almost_equal( self.model.coef, np.array([[ -2.0172203, -52.14364269, 0.07089677, -0.42552125, -0.57305292, -0.41272483, 48.32484052 ], [ -2.0172203, -52.14364269, 0.07089677, -0.42552125, -0.57305292, -0.41272483, 48.32484052 ]]).reshape(2, -1), decimal=3) # std.err of coefficient (calibrated by df_resid) self.assertTrue(self.model.stderr is None) # scale self.assertEqual(self.model.dispersion.shape, (2, 2)) np.testing.assert_array_almost_equal( old_div(self.model.dispersion, (old_div(9., self.data_longley.exog.shape[0]))), np.array([[250870.081, 250870.081], [250870.081, 250870.081]]), decimal=3) # predict res = np.array([[280.31871146], [-131.6981265], [90.64414685], [-400.10244445], [-440.59604167], [-543.88595187], [200.70483416], [215.88629903], [74.9456573], [913.85128645], [424.15996133], [-9.5797488], [-360.96841852], [27.214226], [150.87705909], [-492.17489392]]) np.testing.assert_array_almost_equal(self.Y - self.model.predict(self.X), np.hstack((res, res)), decimal=3) # loglike/_per_sample self.assertRaises(ValueError, self.model.loglike, self.X, self.Y) def test_ols_elastic_net_regularized(self): # sklearn elastic net and l1 does not take sample_weights, will not test pass def test_ols_sample_weight_all_half(self): self.model = OLS(solver='pinv', fit_intercept=True, est_stderr=True, reg_method=None, alpha=0, l1_ratio=0, tol=1e-4, max_iter=100, coef=None, stderr=None, dispersion=None) self.model.fit(self.X, self.Y, sample_weight=0.5) # coefficient np.testing.assert_array_almost_equal( self.model.coef, np.array( ((-3482258.63459582, 15.0618722713733, -0.358191792925910E-01, -2.02022980381683, -1.03322686717359, -0.511041056535807E-01, 1829.15146461355), (-3482258.63459582, 15.0618722713733, -0.358191792925910E-01, -2.02022980381683, -1.03322686717359, -0.511041056535807E-01, 1829.15146461355))).reshape(2, -1), decimal=3) # std.err of coefficient (calibrated by df_resid) np.testing.assert_array_almost_equal( old_div(self.model.stderr, np.sqrt(old_div(9., self.data_longley.exog.shape[0]))), np.array( ((890420.383607373, 84.9149257747669, 0.334910077722432E-01, 0.488399681651699, 0.214274163161675, 0.226073200069370, 455.478499142212), (890420.383607373, 84.9149257747669, 0.334910077722432E-01, 0.488399681651699, 0.214274163161675, 0.226073200069370, 455.478499142212))).reshape(2, -1), decimal=1) # scale np.testing.assert_array_almost_equal( old_div(self.model.dispersion, (old_div(9., self.data_longley.exog.shape[0]))), np.array(((92936.0061673238, 92936.0061673238), (92936.0061673238, 92936.0061673238))), decimal=3) # predict res = np.array( (267.34003, -94.01394, 46.28717, -410.11462, 309.71459, -249.31122, -164.04896, -13.18036, 14.30477, 455.39409, -17.26893, -39.05504, -155.54997, -85.67131, 341.93151, -206.75783)).reshape(-1, 1) np.testing.assert_array_almost_equal(self.Y - self.model.predict(self.X), np.hstack((res, res)), decimal=3) # loglike/_per_sample self.assertRaises(ValueError, self.model.loglike, self.X, self.Y) def test_ols_sample_weight_all_zero(self): self.model = OLS(solver='pinv', fit_intercept=True, est_stderr=True, reg_method=None, alpha=0, l1_ratio=0, tol=1e-4, max_iter=100, coef=None, stderr=None, dispersion=None) self.assertRaises(ValueError, self.model.fit, self.X, self.Y, 0) def test_ols_sample_weight_half_zero_half_one(self): self.model = OLS(solver='pinv', fit_intercept=True, est_stderr=True, reg_method=None, alpha=0, l1_ratio=0, tol=1e-4, max_iter=100, coef=None, stderr=None, dispersion=None) len_half = 8 self.model.fit(self.X, self.Y, sample_weight=np.array( [1] * len_half + [0] * (self.data_longley.exog.shape[0] - len_half))) self.model_half = OLS(solver='pinv', fit_intercept=True, est_stderr=True, reg_method=None, alpha=0, l1_ratio=0, max_iter=100, coef=None, stderr=None, dispersion=None) self.model_half.fit(self.X[:len_half], self.Y[:len_half]) # coefficient np.testing.assert_array_almost_equal(self.model.coef, self.model_half.coef, decimal=3) # std.err np.testing.assert_array_almost_equal(self.model.stderr, self.model_half.stderr, decimal=3) # scale np.testing.assert_array_almost_equal(self.model.dispersion, self.model_half.dispersion, decimal=3) # corner cases def test_ols_one_data_point(self): self.model = OLS(solver='pinv', fit_intercept=True, est_stderr=True, reg_method=None, alpha=0, l1_ratio=0, tol=1e-4, max_iter=100, coef=None, stderr=None, dispersion=None) self.model.fit(self.X[0:1, :], self.Y[0:1, ], sample_weight=0.5) # coef self.assertEqual(self.model.coef.shape, (2, 7)) # scale np.testing.assert_array_almost_equal(self.model.dispersion, np.array([[0, 0], [0, 0]]), decimal=6) # loglike_per_sample np.testing.assert_array_equal( self.model.loglike_per_sample(self.X[0:1, :], self.Y[0:1, ]), np.array([0])) np.testing.assert_array_almost_equal( self.model.loglike_per_sample( np.array(self.X[0:1, :].tolist() * 6), np.array([[60323, 60323], [0, 60323], [60323, 60323], [60322, 60323], [60322, 60322], [60323, 60323]])), np.array([0, -np.Infinity, 0, -np.Infinity, -np.Infinity, 0]), decimal=3) def test_ols_multicolinearty(self): self.model_col = OLS(solver='pinv', fit_intercept=False, est_stderr=True, reg_method=None, alpha=0, l1_ratio=0, tol=1e-4, max_iter=100, coef=None, stderr=None, dispersion=None) X = np.hstack([self.X[:, 0:1], self.X[:, 0:1]]) self.model_col.fit(X, self.Y, sample_weight=0.8) self.model = OLS(solver='pinv', fit_intercept=False, est_stderr=True, reg_method=None, alpha=0, l1_ratio=0, tol=1e-4, max_iter=100, coef=None, stderr=None, dispersion=None) self.model.fit(self.X[:, 0:1], self.Y, sample_weight=0.8) # coef np.testing.assert_array_almost_equal( self.model_col.coef, np.array([[319.47969664, 319.47969664], [319.47969664, 319.47969664]]).reshape(2, -1), decimal=3) # stderr self.assertEqual(self.model_col.stderr, None) # scale np.testing.assert_array_almost_equal(self.model_col.dispersion, self.model.dispersion, decimal=3) # loglike_per_sample self.assertRaises(ValueError, self.model_col.loglike, X, self.Y) np.testing.assert_array_almost_equal(self.model_col.predict(X), self.model.predict(self.X[:, 0:1]), decimal=3)
class PerfectCorrelationMultivariateOLSTests(unittest.TestCase): @classmethod def setUpClass(cls): np.random.seed(0) cls.data_longley = sm.datasets.longley.load() cls.X = cls.data_longley.exog cls.Y = np.hstack((cls.data_longley.endog.reshape(-1, 1), cls.data_longley.endog.reshape(-1, 1))) def test_ols(self): self.model = OLS( solver='auto', fit_intercept=True, est_stderr=True, reg_method=None, alpha=0, l1_ratio=0, tol=1e-4, max_iter=100, coef=None, stderr=None, dispersion=None) self.model.fit(self.X, self.Y) # coefficient self.assertEqual(self.model.coef.shape, (2, 7)) np.testing.assert_array_almost_equal( self.model.coef, np.array([[-3482258.63459582, 15.0618722713733, -0.358191792925910E-01, -2.02022980381683, -1.03322686717359, -0.511041056535807E-01, 1829.15146461355], [-3482258.63459582, 15.0618722713733, -0.358191792925910E-01, -2.02022980381683, -1.03322686717359, -0.511041056535807E-01, 1829.15146461355]]).reshape(2, -1), decimal=3) # std.err of coefficient (calibrated by df_resid) self.assertEqual(self.model.stderr.shape, (2, 7)) np.testing.assert_array_almost_equal( old_div(self.model.stderr, np.sqrt(old_div(9., self.data_longley.exog.shape[0]))), np.array([[890420.383607373, 84.9149257747669, 0.03349, 0.488399681651699, 0.214274163161675, 0.226073200069370, 455.478499142212], [890420.383607373, 84.9149257747669, 0.03349, 0.488399681651699, 0.214274163161675, 0.226073200069370, 455.478499142212]]).reshape(2, -1), decimal=2) # scale self.assertEqual(self.model.dispersion.shape, (2, 2)) np.testing.assert_array_almost_equal( old_div(self.model.dispersion, (old_div(9., self.data_longley.exog.shape[0]))), np.array([[92936.0061673238, 92936.0061673238], [92936.0061673238, 92936.0061673238]]), decimal=3) # predict np.testing.assert_array_almost_equal( self.Y - self.model.predict(self.X), np.hstack((np.array([267.34003, -94.01394, 46.28717, -410.11462, 309.71459, -249.31122, -164.04896, -13.18036, 14.30477, 455.39409, -17.26893, -39.05504, -155.54997, -85.67131, 341.93151, -206.75783]).reshape(-1, 1), np.array([267.34003, -94.01394, 46.28717, -410.11462, 309.71459, -249.31122, -164.04896, -13.18036, 14.30477, 455.39409, -17.26893, -39.05504, -155.54997, -85.67131, 341.93151, -206.75783]).reshape(-1, 1))), decimal=3) # loglike/_per_sample self.assertRaises(ValueError, self.model.loglike_per_sample, self.X, self.Y) def test_ols_l1_regularized(self): # sklearn elastic net and l1 does not take sample_weights, will not test pass def test_ols_l2_regularized(self): # there is a bug in sklearn with weights, it can only use list right now self.model = OLS( solver='auto', fit_intercept=True, est_stderr=True, reg_method='l2', alpha=0.1, l1_ratio=0, tol=1e-4, max_iter=100, coef=None, stderr=None, dispersion=None) self.model.fit(self.X, self.Y, sample_weight=0.5) # coefficient np.testing.assert_array_almost_equal( self.model.coef, np.array([[-2.0172203, -52.14364269, 0.07089677, -0.42552125, -0.57305292, -0.41272483, 48.32484052], [-2.0172203, -52.14364269, 0.07089677, -0.42552125, -0.57305292, -0.41272483, 48.32484052]]).reshape(2, -1), decimal=3) # std.err of coefficient (calibrated by df_resid) self.assertTrue(self.model.stderr is None) # scale self.assertEqual(self.model.dispersion.shape, (2, 2)) np.testing.assert_array_almost_equal( old_div(self.model.dispersion, (old_div(9., self.data_longley.exog.shape[0]))), np.array([[250870.081, 250870.081], [250870.081, 250870.081]]), decimal=3) # predict res = np.array([[280.31871146], [-131.6981265], [90.64414685], [-400.10244445], [-440.59604167], [-543.88595187], [200.70483416], [215.88629903], [74.9456573], [913.85128645], [424.15996133], [-9.5797488], [-360.96841852], [27.214226], [150.87705909], [-492.17489392]]) np.testing.assert_array_almost_equal( self.Y - self.model.predict(self.X), np.hstack((res, res)), decimal=3) # loglike/_per_sample self.assertRaises(ValueError, self.model.loglike, self.X, self.Y) def test_ols_elastic_net_regularized(self): # sklearn elastic net and l1 does not take sample_weights, will not test pass def test_ols_sample_weight_all_half(self): self.model = OLS( solver='pinv', fit_intercept=True, est_stderr=True, reg_method=None, alpha=0, l1_ratio=0, tol=1e-4, max_iter=100, coef=None, stderr=None, dispersion=None) self.model.fit(self.X, self.Y, sample_weight=0.5) # coefficient np.testing.assert_array_almost_equal( self.model.coef, np.array(((-3482258.63459582, 15.0618722713733, -0.358191792925910E-01, -2.02022980381683, -1.03322686717359, -0.511041056535807E-01, 1829.15146461355), (-3482258.63459582, 15.0618722713733, -0.358191792925910E-01, -2.02022980381683, -1.03322686717359, -0.511041056535807E-01, 1829.15146461355))).reshape(2, -1), decimal=3) # std.err of coefficient (calibrated by df_resid) np.testing.assert_array_almost_equal( old_div(self.model.stderr, np.sqrt(old_div(9., self.data_longley.exog.shape[0]))), np.array(((890420.383607373, 84.9149257747669, 0.334910077722432E-01, 0.488399681651699, 0.214274163161675, 0.226073200069370, 455.478499142212), (890420.383607373, 84.9149257747669, 0.334910077722432E-01, 0.488399681651699, 0.214274163161675, 0.226073200069370, 455.478499142212))).reshape(2, -1), decimal=1) # scale np.testing.assert_array_almost_equal( old_div(self.model.dispersion, (old_div(9., self.data_longley.exog.shape[0]))), np.array(((92936.0061673238, 92936.0061673238), (92936.0061673238, 92936.0061673238))), decimal=3) # predict res = np.array((267.34003, -94.01394, 46.28717, -410.11462, 309.71459, -249.31122, -164.04896, -13.18036, 14.30477, 455.39409, -17.26893, -39.05504, -155.54997, -85.67131, 341.93151, -206.75783)).reshape(-1, 1) np.testing.assert_array_almost_equal( self.Y - self.model.predict(self.X), np.hstack((res, res)), decimal=3) # loglike/_per_sample self.assertRaises(ValueError, self.model.loglike, self.X, self.Y) def test_ols_sample_weight_all_zero(self): self.model = OLS( solver='pinv', fit_intercept=True, est_stderr=True, reg_method=None, alpha=0, l1_ratio=0, tol=1e-4, max_iter=100, coef=None, stderr=None, dispersion=None) self.assertRaises(ValueError, self.model.fit, self.X, self.Y, 0) def test_ols_sample_weight_half_zero_half_one(self): self.model = OLS( solver='pinv', fit_intercept=True, est_stderr=True, reg_method=None, alpha=0, l1_ratio=0, tol=1e-4, max_iter=100, coef=None, stderr=None, dispersion=None) len_half = 8 self.model.fit(self.X, self.Y, sample_weight=np.array([1] * len_half + [0] * (self.data_longley.exog.shape[0] - len_half))) self.model_half = OLS( solver='pinv', fit_intercept=True, est_stderr=True, reg_method=None, alpha=0, l1_ratio=0, max_iter=100, coef=None, stderr=None, dispersion=None) self.model_half.fit(self.X[:len_half], self.Y[:len_half]) # coefficient np.testing.assert_array_almost_equal( self.model.coef, self.model_half.coef, decimal=3) # std.err np.testing.assert_array_almost_equal( self.model.stderr, self.model_half.stderr, decimal=3) # scale np.testing.assert_array_almost_equal( self.model.dispersion, self.model_half.dispersion, decimal=3) # corner cases def test_ols_one_data_point(self): self.model = OLS( solver='pinv', fit_intercept=True, est_stderr=True, reg_method=None, alpha=0, l1_ratio=0, tol=1e-4, max_iter=100, coef=None, stderr=None, dispersion=None) self.model.fit(self.X[0:1, :], self.Y[0:1, ], sample_weight=0.5) # coef self.assertEqual(self.model.coef.shape, (2, 7)) # scale np.testing.assert_array_almost_equal( self.model.dispersion, np.array([[0, 0], [0, 0]]), decimal=6) # loglike_per_sample np.testing.assert_array_equal(self.model.loglike_per_sample( self.X[0:1, :], self.Y[0:1, ]), np.array([0])) np.testing.assert_array_almost_equal(self.model.loglike_per_sample( np.array(self.X[0:1, :].tolist() * 6), np.array([[60323, 60323], [0, 60323], [60323, 60323], [60322, 60323], [60322, 60322], [60323, 60323]])), np.array([0, -np.Infinity, 0, -np.Infinity, -np.Infinity, 0]), decimal=3) def test_ols_multicolinearty(self): self.model_col = OLS( solver='pinv', fit_intercept=False, est_stderr=True, reg_method=None, alpha=0, l1_ratio=0, tol=1e-4, max_iter=100, coef=None, stderr=None, dispersion=None) X = np.hstack([self.X[:, 0:1], self.X[:, 0:1]]) self.model_col.fit(X, self.Y, sample_weight=0.8) self.model = OLS( solver='pinv', fit_intercept=False, est_stderr=True, reg_method=None, alpha=0, l1_ratio=0, tol=1e-4, max_iter=100, coef=None, stderr=None, dispersion=None) self.model.fit(self.X[:, 0:1], self.Y, sample_weight=0.8) # coef np.testing.assert_array_almost_equal( self.model_col.coef, np.array([[319.47969664, 319.47969664], [319.47969664, 319.47969664]]).reshape(2, -1), decimal=3) # stderr self.assertEqual(self.model_col.stderr, None) # scale np.testing.assert_array_almost_equal( self.model_col.dispersion, self.model.dispersion, decimal=3) # loglike_per_sample self.assertRaises(ValueError, self.model_col.loglike, X, self.Y) np.testing.assert_array_almost_equal( self.model_col.predict(X), self.model.predict(self.X[:, 0:1]), decimal=3)
class IndependentMultivariateOLSTests(unittest.TestCase): @classmethod def setUpClass(cls): np.random.seed(0) cls.X = np.random.normal(size=(1000, 1)) cls.Y = np.random.normal(size=(cls.X.shape[0], 2)) def test_ols(self): self.model = OLS(solver='pinv', fit_intercept=True, est_stderr=True, reg_method=None, alpha=0, l1_ratio=0, tol=1e-4, max_iter=100, coef=None, stderr=None, dispersion=None) self.model.fit(self.X, self.Y) # coefficient self.assertEqual(self.model.coef.shape, (2, 2)) np.testing.assert_array_almost_equal( self.model.coef, np.array([[-0.02924966, -0.03484827], [-0.00978688, 0.00336316]]).reshape(2, -1), decimal=3) # std.err of coefficient (calibrated by df_resid) self.assertEqual(self.model.stderr.shape, (2, 2)) np.testing.assert_array_almost_equal( self.model.stderr, np.array([[0.03083908, 0.03121143], [0.03002101, 0.03038348]]).reshape(2, -1), decimal=2) # scale self.assertEqual(self.model.dispersion.shape, (2, 2)) np.testing.assert_array_almost_equal(self.model.dispersion, np.array([[0.94905363, 0.0164185], [0.0164185, 0.89937019]]), decimal=3) # loglike/_per_sample self.assertAlmostEqual(self.model.loglike(self.X, self.Y), -2758.54387369, places=3) # to_json json_dict = self.model.to_json( './tests/linear_models/OLS/MultivariateOLS/') self.assertEqual(json_dict['properties']['solver'], 'pinv') # from_json self.model_from_json = OLS.from_json(json_dict) np.testing.assert_array_almost_equal(self.model.coef, self.model_from_json.coef, decimal=3) np.testing.assert_array_almost_equal(self.model.stderr, self.model_from_json.stderr, decimal=3) np.testing.assert_array_almost_equal(self.model.dispersion, self.model_from_json.dispersion, decimal=3) def test_ols_l2_regularized(self): self.model = OLS(solver='auto', fit_intercept=True, est_stderr=True, reg_method='l2', alpha=0.1, l1_ratio=1, tol=1e-4, max_iter=100, coef=None, stderr=None, dispersion=None) self.model.fit(self.X, self.Y) # coefficient self.assertEqual(self.model.coef.shape, (2, 2)) np.testing.assert_array_almost_equal( self.model.coef, np.array([[-0.0292465, -0.03484456], [-0.00978591, 0.00336286]]).reshape(2, -1), decimal=3) # std.err of coefficient (calibrated by df_resid) self.assertTrue(self.model.stderr is None) # scale self.assertEqual(self.model.dispersion.shape, (2, 2)) np.testing.assert_array_almost_equal(self.model.dispersion, np.array([[0.94905363, 0.0164185], [0.0164185, 0.89937019]]), decimal=3) # loglike/_per_sample self.assertAlmostEqual(self.model.loglike(self.X, self.Y), -2758.5438737, places=3) def test_ols_l1_regularized(self): # sklearn l1 and elstic net does not support sample weight pass def test_ols_sample_weight_all_half(self): self.model = OLS(solver='pinv', fit_intercept=True, est_stderr=True, reg_method=None, alpha=0, l1_ratio=0, tol=1e-4, max_iter=100, coef=None, stderr=None, dispersion=None) self.model.fit(self.X, self.Y, sample_weight=0.5) # coefficient self.assertEqual(self.model.coef.shape, (2, 2)) np.testing.assert_array_almost_equal( self.model.coef, np.array([[-0.02924966, -0.03484827], [-0.00978688, 0.00336316]]).reshape(2, -1), decimal=3) # std.err of coefficient (calibrated by df_resid) self.assertEqual(self.model.stderr.shape, (2, 2)) np.testing.assert_array_almost_equal( self.model.stderr, np.array([[0.03083908, 0.03121143], [0.03002101, 0.03038348]]).reshape(2, -1), decimal=2) # scale self.assertEqual(self.model.dispersion.shape, (2, 2)) np.testing.assert_array_almost_equal(self.model.dispersion, np.array([[0.94905363, 0.0164185], [0.0164185, 0.89937019]]), decimal=3) # loglike/_per_sample self.assertAlmostEqual(self.model.loglike(self.X, self.Y, 0.5), old_div(-2758.54387369, 2.), places=3) self.assertEqual( self.model.loglike_per_sample(self.X, self.Y).shape, (1000, )) def test_ols_sample_weight_all_zero(self): self.model = OLS(solver='pinv', fit_intercept=True, est_stderr=True, reg_method=None, alpha=0, l1_ratio=0, tol=1e-4, max_iter=100, coef=None, stderr=None, dispersion=None) self.assertRaises(ValueError, self.model.fit, self.X, self.Y, 0) def test_ols_sample_weight_half_zero_half_one(self): self.model = OLS(solver='pinv', fit_intercept=True, est_stderr=True, reg_method=None, alpha=0, l1_ratio=0, tol=1e-4, max_iter=100, coef=None, stderr=None, dispersion=None) len_half = 500 self.model.fit(self.X, self.Y, sample_weight=np.array([1] * len_half + [0] * (self.X.shape[0] - len_half))) self.model_half = OLS(solver='pinv', fit_intercept=True, est_stderr=True, reg_method=None, alpha=0, l1_ratio=0, max_iter=100, coef=None, stderr=None, dispersion=None) self.model_half.fit(self.X[:len_half], self.Y[:len_half]) # coefficient np.testing.assert_array_almost_equal(self.model.coef, self.model_half.coef, decimal=3) # std.err np.testing.assert_array_almost_equal(self.model.stderr, self.model_half.stderr, decimal=3) # scale np.testing.assert_array_almost_equal(self.model.dispersion, self.model_half.dispersion, decimal=3) # corner cases def test_ols_one_data_point(self): self.model = OLS(solver='pinv', fit_intercept=True, est_stderr=True, reg_method=None, alpha=0, l1_ratio=0, tol=1e-4, max_iter=100, coef=None, stderr=None, dispersion=None) self.model.fit(self.X[0:1, :], self.Y[0:1, ], sample_weight=0.5) # coef self.assertEqual(self.model.coef.shape, (2, 2)) # scale np.testing.assert_array_almost_equal(self.model.dispersion, np.array([[0, 0], [0, 0]]), decimal=6) # loglike_per_sample np.testing.assert_array_equal( self.model.loglike_per_sample(self.X[0:1, :], self.Y[0:1, ]), np.array([0])) np.testing.assert_array_almost_equal( self.model.loglike_per_sample( np.array(self.X[0:1, :].tolist() * 6), np.array([ self.Y[0, ], self.Y[1, ], self.Y[0, ], self.Y[1, ], self.Y[1, ], self.Y[0, ] ])), np.array([0, -np.Infinity, 0, -np.Infinity, -np.Infinity, 0]), decimal=3) def test_ols_multicolinearty(self): self.model_col = OLS(solver='pinv', fit_intercept=False, est_stderr=True, reg_method=None, alpha=0, l1_ratio=0, tol=1e-4, max_iter=100, coef=None, stderr=None, dispersion=None) X = np.hstack([self.X[:, 0:1], self.X[:, 0:1]]) self.model_col.fit(X, self.Y, sample_weight=0.5) self.model = OLS(solver='pinv', fit_intercept=False, est_stderr=True, reg_method=None, alpha=0, l1_ratio=0, tol=1e-4, max_iter=100, coef=None, stderr=None, dispersion=None) self.model.fit(self.X[:, 0:1], self.Y, sample_weight=0.5) # stderr self.assertEqual(self.model_col.stderr, None) # scale np.testing.assert_array_almost_equal(self.model_col.dispersion, self.model.dispersion, decimal=3) # loglike_per_sample np.testing.assert_array_almost_equal( self.model_col.loglike_per_sample(X, self.Y), self.model.loglike_per_sample(self.X[:, 0:1], self.Y), decimal=0) np.testing.assert_array_almost_equal(self.model_col.predict(X), self.model.predict(self.X[:, 0:1]), decimal=1)
class IndependentMultivariateOLSTests(unittest.TestCase): @classmethod def setUpClass(cls): np.random.seed(0) cls.X = np.random.normal(size=(1000, 1)) cls.Y = np.random.normal(size=(cls.X.shape[0], 2)) def test_ols(self): self.model = OLS( solver='pinv', fit_intercept=True, est_stderr=True, reg_method=None, alpha=0, l1_ratio=0, tol=1e-4, max_iter=100, coef=None, stderr=None, dispersion=None) self.model.fit(self.X, self.Y) # coefficient print(self.model.coef) print(self.model.dispersion) print(self.model.stderr) print(self.Y - self.model.predict(self.X)) print(self.model.loglike(self.X, self.Y)) self.assertEqual(self.model.coef.shape, (2, 2)) np.testing.assert_array_almost_equal( self.model.coef, np.array([[-0.02924966, -0.03484827], [-0.00978688, 0.00336316]]).reshape(2, -1), decimal=3) # std.err of coefficient (calibrated by df_resid) self.assertEqual(self.model.stderr.shape, (2, 2)) np.testing.assert_array_almost_equal( self.model.stderr, np.array([[0.03083908, 0.03121143], [0.03002101, 0.03038348]]).reshape(2, -1), decimal=2) # scale self.assertEqual(self.model.dispersion.shape, (2, 2)) np.testing.assert_array_almost_equal( self.model.dispersion, np.array([[0.94905363, 0.0164185], [0.0164185, 0.89937019]]), decimal=3) # loglike/_per_sample self.assertAlmostEqual( self.model.loglike(self.X, self.Y), -2758.54387369, places=3) # to_json json_dict = self.model.to_json('./tests/linear_models/OLS/MultivariateOLS/') self.assertEqual(json_dict['properties']['solver'], 'pinv') # from_json self.model_from_json = OLS.from_json(json_dict) np.testing.assert_array_almost_equal( self.model.coef, self.model_from_json.coef, decimal=3) np.testing.assert_array_almost_equal( self.model.stderr, self.model_from_json.stderr, decimal=3) np.testing.assert_array_almost_equal( self.model.dispersion, self.model_from_json.dispersion, decimal=3) def test_ols_l2_regularized(self): self.model = OLS( solver='auto', fit_intercept=True, est_stderr=True, reg_method='l2', alpha=0.1, l1_ratio=1, tol=1e-4, max_iter=100, coef=None, stderr=None, dispersion=None) self.model.fit(self.X, self.Y) # coefficient print(self.model.coef) print(self.model.dispersion) print(self.model.loglike(self.X, self.Y)) self.assertEqual(self.model.coef.shape, (2, 2)) np.testing.assert_array_almost_equal( self.model.coef, np.array([[-0.0292465, -0.03484456], [-0.00978591, 0.00336286]]).reshape(2, -1), decimal=3) # std.err of coefficient (calibrated by df_resid) self.assertTrue(self.model.stderr is None) # scale self.assertEqual(self.model.dispersion.shape, (2, 2)) np.testing.assert_array_almost_equal( self.model.dispersion, np.array([[0.94905363, 0.0164185], [0.0164185, 0.89937019]]), decimal=3) # loglike/_per_sample self.assertAlmostEqual( self.model.loglike(self.X, self.Y), -2758.5438737, places=3) def test_ols_l1_regularized(self): # sklearn l1 and elstic net does not support sample weight pass def test_ols_sample_weight_all_half(self): self.model = OLS( solver='pinv', fit_intercept=True, est_stderr=True, reg_method=None, alpha=0, l1_ratio=0, tol=1e-4, max_iter=100, coef=None, stderr=None, dispersion=None) self.model.fit(self.X, self.Y, sample_weight=0.5) # coefficient print(self.model.coef) print(self.model.dispersion) print(self.model.stderr) print(self.model.loglike(self.X, self.Y, sample_weight=0.5)) self.assertEqual(self.model.coef.shape, (2, 2)) np.testing.assert_array_almost_equal( self.model.coef, np.array([[-0.02924966, -0.03484827], [-0.00978688, 0.00336316]]).reshape(2, -1), decimal=3) # std.err of coefficient (calibrated by df_resid) self.assertEqual(self.model.stderr.shape, (2, 2)) np.testing.assert_array_almost_equal( self.model.stderr, np.array([[0.03083908, 0.03121143], [0.03002101, 0.03038348]]).reshape(2, -1), decimal=2) # scale self.assertEqual(self.model.dispersion.shape, (2, 2)) np.testing.assert_array_almost_equal( self.model.dispersion, np.array([[0.94905363, 0.0164185], [0.0164185, 0.89937019]]), decimal=3) # loglike/_per_sample self.assertAlmostEqual( self.model.loglike(self.X, self.Y, 0.5), old_div(-2758.54387369, 2.), places=3) self.assertEqual( self.model.loglike_per_sample(self.X, self.Y).shape, (1000, )) def test_ols_sample_weight_all_zero(self): self.model = OLS( solver='pinv', fit_intercept=True, est_stderr=True, reg_method=None, alpha=0, l1_ratio=0, tol=1e-4, max_iter=100, coef=None, stderr=None, dispersion=None) self.assertRaises(ValueError, self.model.fit, self.X, self.Y, 0) def test_ols_sample_weight_half_zero_half_one(self): self.model = OLS( solver='pinv', fit_intercept=True, est_stderr=True, reg_method=None, alpha=0, l1_ratio=0, tol=1e-4, max_iter=100, coef=None, stderr=None, dispersion=None) len_half = 500 self.model.fit(self.X, self.Y, sample_weight=np.array([1] * len_half + [0] * (self.X.shape[0] - len_half))) self.model_half = OLS( solver='pinv', fit_intercept=True, est_stderr=True, reg_method=None, alpha=0, l1_ratio=0, max_iter=100, coef=None, stderr=None, dispersion=None) self.model_half.fit(self.X[:len_half], self.Y[:len_half]) # coefficient np.testing.assert_array_almost_equal( self.model.coef, self.model_half.coef, decimal=3) # std.err np.testing.assert_array_almost_equal( self.model.stderr, self.model_half.stderr, decimal=3) # scale np.testing.assert_array_almost_equal( self.model.dispersion, self.model_half.dispersion, decimal=3) # corner cases def test_ols_one_data_point(self): self.model = OLS( solver='pinv', fit_intercept=True, est_stderr=True, reg_method=None, alpha=0, l1_ratio=0, tol=1e-4, max_iter=100, coef=None, stderr=None, dispersion=None) self.model.fit(self.X[0:1, :], self.Y[0:1, ], sample_weight=0.5) # coef self.assertEqual(self.model.coef.shape, (2, 2)) # scale np.testing.assert_array_almost_equal( self.model.dispersion, np.array([[0, 0], [0, 0]]), decimal=6) # loglike_per_sample np.testing.assert_array_equal(self.model.loglike_per_sample( self.X[0:1, :], self.Y[0:1, ]), np.array([0])) np.testing.assert_array_almost_equal(self.model.loglike_per_sample( np.array(self.X[0:1, :].tolist() * 6), np.array([self.Y[0, ], self.Y[1, ], self.Y[0, ], self.Y[1, ], self.Y[1, ], self.Y[0, ]])), np.array([0, -np.Infinity, 0, -np.Infinity, -np.Infinity, 0]), decimal=3) def test_ols_multicolinearty(self): self.model_col = OLS( solver='pinv', fit_intercept=False, est_stderr=True, reg_method=None, alpha=0, l1_ratio=0, tol=1e-4, max_iter=100, coef=None, stderr=None, dispersion=None) X = np.hstack([self.X[:, 0:1], self.X[:, 0:1]]) self.model_col.fit(X, self.Y, sample_weight=0.5) self.model = OLS( solver='pinv', fit_intercept=False, est_stderr=True, reg_method=None, alpha=0, l1_ratio=0, tol=1e-4, max_iter=100, coef=None, stderr=None, dispersion=None) self.model.fit(self.X[:, 0:1], self.Y, sample_weight=0.5) # stderr self.assertEqual(self.model_col.stderr, None) # scale np.testing.assert_array_almost_equal( self.model_col.dispersion, self.model.dispersion, decimal=3) # loglike_per_sample np.testing.assert_array_almost_equal( self.model_col.loglike_per_sample(X, self.Y), self.model.loglike_per_sample(self.X[:, 0:1], self.Y), decimal=0) np.testing.assert_array_almost_equal( self.model_col.predict(X), self.model.predict(self.X[:, 0:1]), decimal=1)