Python OLSの例、IOHMM.OLS Pythonの例

コード例 #1

1

ファイルを表示

    def test_ols(self):
        self.model = OLS(
            solver='pinv', fit_intercept=True, est_stderr=True,
            reg_method=None,  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None,  dispersion=None)
        self.model.fit(self.data_longley.exog, self.data_longley.endog)
        # coefficient
        self.assertEqual(self.model.coef.shape, (1, 7))
        np.testing.assert_array_almost_equal(
            self.model.coef,
            np.array([-3482258.63459582, 15.0618722713733, -0.358191792925910E-01,
                      -2.02022980381683, -1.03322686717359, -0.511041056535807E-01,
                      1829.15146461355]).reshape(1, -1),
            decimal=3)
        # std.err of coefficient (calibrated by df_resid)
        self.assertEqual(self.model.stderr.shape, (1, 7))
        np.testing.assert_array_almost_equal(
            old_div(self.model.stderr, np.sqrt(old_div(9., self.data_longley.exog.shape[0]))),
            np.array([890420.383607373, 84.9149257747669, 0.03349,
                      0.488399681651699, 0.214274163161675, 0.226073200069370,
                      455.478499142212]).reshape(1, -1),
            decimal=2)
        # scale
        self.assertEqual(self.model.dispersion.shape, (1, 1))
        np.testing.assert_array_almost_equal(
            old_div(self.model.dispersion, (old_div(9., self.data_longley.exog.shape[0]))),
            np.array([[92936.0061673238]]),
            decimal=3)
        # predict
        np.testing.assert_array_almost_equal(
            self.data_longley.endog.reshape(-1, 1) - self.model.predict(self.data_longley.exog),
            np.array([267.34003, -94.01394, 46.28717, -410.11462,
                      309.71459, -249.31122, -164.04896, -13.18036, 14.30477, 455.39409,
                      -17.26893, -39.05504, -155.54997, -85.67131, 341.93151,
                      -206.75783]).reshape(-1, 1),
            decimal=3)
        # loglike/_per_sample
        self.assertAlmostEqual(
            self.model.loglike(self.data_longley.exog, self.data_longley.endog),
            -109.61743480849013,
            places=3)

        # to_json
        json_dict = self.model.to_json('./tests/linear_models/OLS/UnivariateOLS/')
        self.assertEqual(json_dict['properties']['solver'], 'pinv')

        # from_json
        self.model_from_json = OLS.from_json(json_dict)
        np.testing.assert_array_almost_equal(
            self.model.coef,
            self.model_from_json.coef,
            decimal=3)
        np.testing.assert_array_almost_equal(
            self.model.stderr,
            self.model_from_json.stderr,
            decimal=3)
        self.assertEqual(
            self.model.dispersion,
            self.model_from_json.dispersion)

コード例 #2

0

ファイルを表示

 def test_ols_multicolinearty(self):
     self.model_col = OLS(
         solver='pinv', fit_intercept=False, est_stderr=True,
         reg_method=None,  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
         coef=None, stderr=None,  dispersion=None)
     X = np.hstack([self.X[:, 0:1], self.X[:, 0:1]])
     self.model_col.fit(X,
                        self.Y, sample_weight=0.5)
     self.model = OLS(
         solver='pinv', fit_intercept=False, est_stderr=True,
         reg_method=None,  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
         coef=None, stderr=None,  dispersion=None)
     self.model.fit(self.X[:, 0:1],
                    self.Y, sample_weight=0.5)
     # stderr
     self.assertEqual(self.model_col.stderr, None)
     # scale
     np.testing.assert_array_almost_equal(
         self.model_col.dispersion, self.model.dispersion, decimal=3)
     # loglike_per_sample
     np.testing.assert_array_almost_equal(
         self.model_col.loglike_per_sample(X, self.Y),
         self.model.loglike_per_sample(self.X[:, 0:1],
                                       self.Y), decimal=0)
     np.testing.assert_array_almost_equal(
         self.model_col.predict(X),
         self.model.predict(self.X[:, 0:1]), decimal=1)

コード例 #3

0

ファイルを表示

ファイル: test_OLS.py プロジェクト: vishalbelsare/IOHMM

 def test_ols_one_data_point(self):
     self.model = OLS(solver='pinv',
                      fit_intercept=True,
                      est_stderr=True,
                      reg_method=None,
                      alpha=0,
                      l1_ratio=0,
                      tol=1e-4,
                      max_iter=100,
                      coef=None,
                      stderr=None,
                      dispersion=None)
     self.model.fit(self.X[0:1, :], self.Y[0:1, ], sample_weight=0.5)
     # coef
     self.assertEqual(self.model.coef.shape, (2, 7))
     # scale
     np.testing.assert_array_almost_equal(self.model.dispersion,
                                          np.array([[0, 0], [0, 0]]),
                                          decimal=6)
     # loglike_per_sample
     np.testing.assert_array_equal(
         self.model.loglike_per_sample(self.X[0:1, :], self.Y[0:1, ]),
         np.array([0]))
     np.testing.assert_array_almost_equal(
         self.model.loglike_per_sample(
             np.array(self.X[0:1, :].tolist() * 6),
             np.array([[60323, 60323], [0, 60323], [60323, 60323],
                       [60322, 60323], [60322, 60322], [60323, 60323]])),
         np.array([0, -np.Infinity, 0, -np.Infinity, -np.Infinity, 0]),
         decimal=3)

コード例 #4

0

ファイルを表示

ファイル: test_OLS.py プロジェクト: vishalbelsare/IOHMM

 def test_ols_l2_regularized(self):
     self.model = OLS(solver='auto',
                      fit_intercept=True,
                      est_stderr=True,
                      reg_method='l2',
                      alpha=0.1,
                      l1_ratio=1,
                      tol=1e-4,
                      max_iter=100,
                      coef=None,
                      stderr=None,
                      dispersion=None)
     self.model.fit(self.X, self.Y)
     # coefficient
     self.assertEqual(self.model.coef.shape, (2, 2))
     np.testing.assert_array_almost_equal(
         self.model.coef,
         np.array([[-0.0292465, -0.03484456], [-0.00978591,
                                               0.00336286]]).reshape(2, -1),
         decimal=3)
     # std.err of coefficient (calibrated by df_resid)
     self.assertTrue(self.model.stderr is None)
     # scale
     self.assertEqual(self.model.dispersion.shape, (2, 2))
     np.testing.assert_array_almost_equal(self.model.dispersion,
                                          np.array([[0.94905363, 0.0164185],
                                                    [0.0164185,
                                                     0.89937019]]),
                                          decimal=3)
     # loglike/_per_sample
     self.assertAlmostEqual(self.model.loglike(self.X, self.Y),
                            -2758.5438737,
                            places=3)

コード例 #5

0

ファイルを表示

 def test_ols_multicolinearty(self):
     self.model_col = OLS(
         solver='pinv', fit_intercept=False, est_stderr=True,
         reg_method=None,  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
         coef=None, stderr=None,  dispersion=None)
     X = np.hstack([self.X[:, 0:1], self.X[:, 0:1]])
     self.model_col.fit(X,
                        self.Y, sample_weight=0.8)
     self.model = OLS(
         solver='pinv', fit_intercept=False, est_stderr=True,
         reg_method=None,  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
         coef=None, stderr=None,  dispersion=None)
     self.model.fit(self.X[:, 0:1],
                    self.Y, sample_weight=0.8)
     # coef
     np.testing.assert_array_almost_equal(
         self.model_col.coef, np.array([[319.47969664, 319.47969664],
                                        [319.47969664, 319.47969664]]).reshape(2, -1), decimal=3)
     # stderr
     self.assertEqual(self.model_col.stderr, None)
     # scale
     np.testing.assert_array_almost_equal(
         self.model_col.dispersion, self.model.dispersion, decimal=3)
     # loglike_per_sample
     self.assertRaises(ValueError,
                       self.model_col.loglike, X, self.Y)
     np.testing.assert_array_almost_equal(
         self.model_col.predict(X),
         self.model.predict(self.X[:, 0:1]), decimal=3)

コード例 #6

0

ファイルを表示

 def test_ols_sample_weight_all_zero(self):
     self.model = OLS(
         solver='pinv', fit_intercept=True, est_stderr=True,
         reg_method=None,  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
         coef=None, stderr=None,  dispersion=None)
     self.assertRaises(ValueError, self.model.fit,
                       self.data_longley.exog, self.data_longley.endog, 0)

コード例 #7

0

ファイルを表示

    def test_ols_sample_weight_half_zero_half_one(self):
        self.model = OLS(
            solver='pinv', fit_intercept=True, est_stderr=True,
            reg_method=None,  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None,  dispersion=None)
        len_half = 8
        self.model.fit(self.X, self.Y,
                       sample_weight=np.array([1] * len_half +
                                              [0] * (self.data_longley.exog.shape[0] - len_half)))
        self.model_half = OLS(
            solver='pinv', fit_intercept=True, est_stderr=True,
            reg_method=None,  alpha=0, l1_ratio=0,  max_iter=100,
            coef=None, stderr=None,  dispersion=None)
        self.model_half.fit(self.X[:len_half], self.Y[:len_half])
        # coefficient
        np.testing.assert_array_almost_equal(
            self.model.coef,
            self.model_half.coef,
            decimal=3)
        # std.err
        np.testing.assert_array_almost_equal(
            self.model.stderr,
            self.model_half.stderr,
            decimal=3)

        # scale
        np.testing.assert_array_almost_equal(
            self.model.dispersion,
            self.model_half.dispersion,
            decimal=3)

コード例 #8

0

ファイルを表示

    def test_ols(self):
        self.model = OLS(
            solver='pinv', fit_intercept=True, est_stderr=True,
            reg_method=None,  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None,  dispersion=None)
        self.model.fit(self.X, self.Y)
        # coefficient
        print(self.model.coef)
        print(self.model.dispersion)
        print(self.model.stderr)
        print(self.Y - self.model.predict(self.X))
        print(self.model.loglike(self.X, self.Y))

        self.assertEqual(self.model.coef.shape, (2, 2))
        np.testing.assert_array_almost_equal(
            self.model.coef,
            np.array([[-0.02924966, -0.03484827],
                      [-0.00978688, 0.00336316]]).reshape(2, -1),
            decimal=3)
        # std.err of coefficient (calibrated by df_resid)
        self.assertEqual(self.model.stderr.shape, (2, 2))
        np.testing.assert_array_almost_equal(
            self.model.stderr,
            np.array([[0.03083908, 0.03121143],
                      [0.03002101, 0.03038348]]).reshape(2, -1),
            decimal=2)
        # scale
        self.assertEqual(self.model.dispersion.shape, (2, 2))
        np.testing.assert_array_almost_equal(
            self.model.dispersion,
            np.array([[0.94905363, 0.0164185],
                      [0.0164185, 0.89937019]]),
            decimal=3)
        # loglike/_per_sample
        self.assertAlmostEqual(
            self.model.loglike(self.X, self.Y),
            -2758.54387369,
            places=3)

        # to_json
        json_dict = self.model.to_json('./tests/linear_models/OLS/MultivariateOLS/')
        self.assertEqual(json_dict['properties']['solver'], 'pinv')

        # from_json
        self.model_from_json = OLS.from_json(json_dict)
        np.testing.assert_array_almost_equal(
            self.model.coef,
            self.model_from_json.coef,
            decimal=3)
        np.testing.assert_array_almost_equal(
            self.model.stderr,
            self.model_from_json.stderr,
            decimal=3)
        np.testing.assert_array_almost_equal(
            self.model.dispersion,
            self.model_from_json.dispersion,
            decimal=3)

コード例 #9

0

ファイルを表示

ファイル: test_OLS.py プロジェクト: vishalbelsare/IOHMM

    def test_ols_l2_regularized(self):
        # there is a bug in sklearn with weights, it can only use list right now
        self.model = OLS(solver='auto',
                         fit_intercept=True,
                         est_stderr=True,
                         reg_method='l2',
                         alpha=0.1,
                         l1_ratio=0,
                         tol=1e-4,
                         max_iter=100,
                         coef=None,
                         stderr=None,
                         dispersion=None)
        self.model.fit(self.data_longley.exog,
                       self.data_longley.endog,
                       sample_weight=0.5)

        # coefficient
        np.testing.assert_array_almost_equal(self.model.coef,
                                             np.array([
                                                 -2.0172203, -52.14364269,
                                                 0.07089677, -0.42552125,
                                                 -0.57305292, -0.41272483,
                                                 48.32484052
                                             ]).reshape(1, -1),
                                             decimal=3)
        # std.err of coefficient (calibrated by df_resid)
        self.assertTrue(self.model.stderr is None)
        # scale
        self.assertEqual(self.model.dispersion.shape, (1, 1))
        np.testing.assert_array_almost_equal(
            old_div(self.model.dispersion,
                    (old_div(9., self.data_longley.exog.shape[0]))),
            np.array([[250870.081]]),
            decimal=3)
        # predict
        np.testing.assert_array_almost_equal(
            self.data_longley.endog.reshape(-1, 1) -
            self.model.predict(self.data_longley.exog),
            np.array([[280.31871146], [-131.6981265], [90.64414685],
                      [-400.10244445], [-440.59604167], [-543.88595187],
                      [200.70483416], [215.88629903], [74.9456573],
                      [913.85128645], [424.15996133], [-9.5797488],
                      [-360.96841852], [27.214226], [150.87705909],
                      [-492.17489392]]),
            decimal=3)
        # loglike/_per_sample
        self.assertAlmostEqual(self.model.loglike(self.data_longley.exog,
                                                  self.data_longley.endog),
                               -117.561627187,
                               places=3)

        self.assertEqual(
            self.model.loglike_per_sample(self.data_longley.exog,
                                          self.data_longley.endog).shape,
            (16, ))

コード例 #10

0

ファイルを表示

ファイル: test_OLS.py プロジェクト: vishalbelsare/IOHMM

    def test_ols_sample_weight_all_half(self):
        self.model = OLS(solver='pinv',
                         fit_intercept=True,
                         est_stderr=True,
                         reg_method=None,
                         alpha=0,
                         l1_ratio=0,
                         tol=1e-4,
                         max_iter=100,
                         coef=None,
                         stderr=None,
                         dispersion=None)
        self.model.fit(self.data_longley.exog,
                       self.data_longley.endog,
                       sample_weight=0.5)
        # coefficient
        np.testing.assert_array_almost_equal(
            self.model.coef,
            np.array(
                (-3482258.63459582, 15.0618722713733, -0.358191792925910E-01,
                 -2.02022980381683, -1.03322686717359, -0.511041056535807E-01,
                 1829.15146461355)).reshape(1, -1),
            decimal=3)
        # std.err of coefficient (calibrated by df_resid)
        np.testing.assert_array_almost_equal(
            old_div(self.model.stderr,
                    np.sqrt(old_div(9., self.data_longley.exog.shape[0]))),
            np.array(
                (890420.383607373, 84.9149257747669, 0.334910077722432E-01,
                 0.488399681651699, 0.214274163161675, 0.226073200069370,
                 455.478499142212)).reshape(1, -1),
            decimal=1)
        # scale
        np.testing.assert_array_almost_equal(
            old_div(self.model.dispersion,
                    (old_div(9., self.data_longley.exog.shape[0]))),
            np.array((92936.0061673238)))
        # predict
        np.testing.assert_array_almost_equal(
            self.data_longley.endog.reshape(-1, 1) -
            self.model.predict(self.data_longley.exog),
            np.array((267.34003, -94.01394, 46.28717, -410.11462, 309.71459,
                      -249.31122, -164.04896, -13.18036, 14.30477, 455.39409,
                      -17.26893, -39.05504, -155.54997, -85.67131, 341.93151,
                      -206.75783)).reshape(-1, 1),
            decimal=3)
        # loglike/_per_sample
        self.assertAlmostEqual(self.model.loglike(self.data_longley.exog,
                                                  self.data_longley.endog),
                               -109.61743480849013,
                               places=3)

        self.assertEqual(
            self.model.loglike_per_sample(self.data_longley.exog,
                                          self.data_longley.endog).shape,
            (16, ))

コード例 #11

0

ファイルを表示

 def test_ols(self):
     self.model = OLS(
         solver='auto', fit_intercept=True, est_stderr=True,
         reg_method=None,  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
         coef=None, stderr=None,  dispersion=None)
     self.model.fit(self.X, self.Y)
     # coefficient
     self.assertEqual(self.model.coef.shape, (2, 7))
     np.testing.assert_array_almost_equal(
         self.model.coef,
         np.array([[-3482258.63459582, 15.0618722713733, -0.358191792925910E-01,
                    -2.02022980381683, -1.03322686717359, -0.511041056535807E-01,
                    1829.15146461355],
                   [-3482258.63459582, 15.0618722713733, -0.358191792925910E-01,
                    -2.02022980381683, -1.03322686717359, -0.511041056535807E-01,
                    1829.15146461355]]).reshape(2, -1),
         decimal=3)
     # std.err of coefficient (calibrated by df_resid)
     self.assertEqual(self.model.stderr.shape, (2, 7))
     np.testing.assert_array_almost_equal(
         old_div(self.model.stderr, np.sqrt(old_div(9., self.data_longley.exog.shape[0]))),
         np.array([[890420.383607373, 84.9149257747669, 0.03349,
                    0.488399681651699, 0.214274163161675, 0.226073200069370,
                    455.478499142212],
                   [890420.383607373, 84.9149257747669, 0.03349,
                    0.488399681651699, 0.214274163161675, 0.226073200069370,
                    455.478499142212]]).reshape(2, -1),
         decimal=2)
     # scale
     self.assertEqual(self.model.dispersion.shape, (2, 2))
     np.testing.assert_array_almost_equal(
         old_div(self.model.dispersion, (old_div(9., self.data_longley.exog.shape[0]))),
         np.array([[92936.0061673238, 92936.0061673238],
                   [92936.0061673238, 92936.0061673238]]),
         decimal=3)
     # predict
     np.testing.assert_array_almost_equal(
         self.Y - self.model.predict(self.X),
         np.hstack((np.array([267.34003, -94.01394, 46.28717, -410.11462,
                              309.71459, -249.31122, -164.04896, -13.18036, 14.30477, 455.39409,
                              -17.26893, -39.05504, -155.54997, -85.67131, 341.93151,
                              -206.75783]).reshape(-1, 1),
                    np.array([267.34003, -94.01394, 46.28717, -410.11462,
                              309.71459, -249.31122, -164.04896, -13.18036, 14.30477, 455.39409,
                              -17.26893, -39.05504, -155.54997, -85.67131, 341.93151,
                              -206.75783]).reshape(-1, 1))),
         decimal=3)
     # loglike/_per_sample
     self.assertRaises(ValueError,
                       self.model.loglike_per_sample, self.X, self.Y)

コード例 #12

0

ファイルを表示

ファイル: test_OLS.py プロジェクト: codealphago/IOHMM

    def test_ols_l2_regularized(self):
        self.model = OLS(
            solver='auto', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0.1, l1_ratio=1,  tol=1e-4, max_iter=100,
            coef=None, stderr=None,  dispersion=None)
        self.model.fit(self.X, self.Y)
        # coefficient
        print(self.model.coef)
        print(self.model.dispersion)
        print(self.model.loglike(self.X, self.Y))

        self.assertEqual(self.model.coef.shape, (2, 2))
        np.testing.assert_array_almost_equal(
            self.model.coef,
            np.array([[-0.0292465, -0.03484456],
                      [-0.00978591, 0.00336286]]).reshape(2, -1),
            decimal=3)
        # std.err of coefficient (calibrated by df_resid)
        self.assertTrue(self.model.stderr is None)
        # scale
        self.assertEqual(self.model.dispersion.shape, (2, 2))
        np.testing.assert_array_almost_equal(
            self.model.dispersion,
            np.array([[0.94905363, 0.0164185],
                      [0.0164185, 0.89937019]]),
            decimal=3)
        # loglike/_per_sample
        self.assertAlmostEqual(
            self.model.loglike(self.X, self.Y),
            -2758.5438737,
            places=3)

コード例 #13

0

ファイルを表示

ファイル: test_OLS.py プロジェクト: codealphago/IOHMM

    def test_ols_l2_regularized(self):
        # there is a bug in sklearn with weights, it can only use list right now
        self.model = OLS(
            solver='auto', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0.1, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None,  dispersion=None)
        self.model.fit(self.data_longley.exog, self.data_longley.endog, sample_weight=0.5)

        # coefficient
        print(self.model.coef)
        print(self.model.dispersion)
        print(self.data_longley.endog.reshape(-1, 1) - self.model.predict(self.data_longley.exog))
        print(self.model.loglike(self.data_longley.exog, self.data_longley.endog))
        np.testing.assert_array_almost_equal(
            self.model.coef,
            np.array([-2.0172203, -52.14364269, 0.07089677, -0.42552125,
                      -0.57305292, -0.41272483, 48.32484052]).reshape(1, -1),
            decimal=3)
        # std.err of coefficient (calibrated by df_resid)
        self.assertTrue(self.model.stderr is None)
        # scale
        self.assertEqual(self.model.dispersion.shape, (1, 1))
        np.testing.assert_array_almost_equal(
            old_div(self.model.dispersion, (old_div(9., self.data_longley.exog.shape[0]))),
            np.array([[250870.081]]),
            decimal=3)
        # predict
        np.testing.assert_array_almost_equal(
            self.data_longley.endog.reshape(-1, 1) - self.model.predict(self.data_longley.exog),
            np.array([[280.31871146],
                      [-131.6981265],
                      [90.64414685],
                      [-400.10244445],
                      [-440.59604167],
                      [-543.88595187],
                      [200.70483416],
                      [215.88629903],
                      [74.9456573],
                      [913.85128645],
                      [424.15996133],
                      [-9.5797488],
                      [-360.96841852],
                      [27.214226],
                      [150.87705909],
                      [-492.17489392]]),
            decimal=3)
        # loglike/_per_sample
        self.assertAlmostEqual(
            self.model.loglike(self.data_longley.exog, self.data_longley.endog),
            -117.561627187,
            places=3)

        self.assertEqual(
            self.model.loglike_per_sample(self.data_longley.exog, self.data_longley.endog).shape,
            (16, ))

コード例 #14

0

ファイルを表示

ファイル: test_OLS.py プロジェクト: vishalbelsare/IOHMM

    def test_ols_sample_weight_all_half(self):
        self.model = OLS(solver='pinv',
                         fit_intercept=True,
                         est_stderr=True,
                         reg_method=None,
                         alpha=0,
                         l1_ratio=0,
                         tol=1e-4,
                         max_iter=100,
                         coef=None,
                         stderr=None,
                         dispersion=None)
        self.model.fit(self.X, self.Y, sample_weight=0.5)
        # coefficient
        self.assertEqual(self.model.coef.shape, (2, 2))
        np.testing.assert_array_almost_equal(
            self.model.coef,
            np.array([[-0.02924966, -0.03484827],
                      [-0.00978688, 0.00336316]]).reshape(2, -1),
            decimal=3)
        # std.err of coefficient (calibrated by df_resid)
        self.assertEqual(self.model.stderr.shape, (2, 2))
        np.testing.assert_array_almost_equal(
            self.model.stderr,
            np.array([[0.03083908, 0.03121143], [0.03002101,
                                                 0.03038348]]).reshape(2, -1),
            decimal=2)
        # scale
        self.assertEqual(self.model.dispersion.shape, (2, 2))
        np.testing.assert_array_almost_equal(self.model.dispersion,
                                             np.array([[0.94905363, 0.0164185],
                                                       [0.0164185,
                                                        0.89937019]]),
                                             decimal=3)
        # loglike/_per_sample
        self.assertAlmostEqual(self.model.loglike(self.X, self.Y, 0.5),
                               old_div(-2758.54387369, 2.),
                               places=3)

        self.assertEqual(
            self.model.loglike_per_sample(self.X, self.Y).shape, (1000, ))

コード例 #15

0

ファイルを表示

ファイル: test_UnSupervisedIOHMM.py プロジェクト: vishalbelsare/IOHMM

    def test_train_no_covariates(self):
        self.model = UnSupervisedIOHMM(num_states=2,
                                       max_EM_iter=100,
                                       EM_tol=1e-6)
        self.model.set_models(model_initial=CrossEntropyMNL(solver='lbfgs',
                                                            reg_method='l2'),
                              model_transition=CrossEntropyMNL(
                                  solver='lbfgs', reg_method='l2'),
                              model_emissions=[OLS()])
        self.model.set_inputs(covariates_initial=[],
                              covariates_transition=[],
                              covariates_emissions=[[]])
        self.model.set_outputs([['rt']])
        self.model.set_data([self.data_speed])
        self.model.train()

        # emission coefficients
        np.testing.assert_array_almost_equal(
            self.model.model_emissions[0][0].coef,
            np.array([[5.5]]),
            decimal=1)
        np.testing.assert_array_almost_equal(
            self.model.model_emissions[1][0].coef,
            np.array([[6.4]]),
            decimal=1)

        # emission dispersion
        np.testing.assert_array_almost_equal(
            self.model.model_emissions[0][0].dispersion,
            np.array([[0.037]]),
            decimal=2)
        np.testing.assert_array_almost_equal(
            self.model.model_emissions[1][0].dispersion,
            np.array([[0.063]]),
            decimal=2)

        # transition
        np.testing.assert_array_almost_equal(
            self.model.model_transition[1].coef, np.array([[2.4]]), decimal=1)
        np.testing.assert_array_almost_equal(
            self.model.model_transition[0].coef, np.array([[-2]]), decimal=1)
        np.testing.assert_array_almost_equal(np.exp(
            self.model.model_transition[1].predict_log_proba(np.array([[]]))),
                                             np.array([[0.08, 0.92]]),
                                             decimal=2)
        np.testing.assert_array_almost_equal(np.exp(
            self.model.model_transition[0].predict_log_proba(np.array([[]]))),
                                             np.array([[0.88, 0.12]]),
                                             decimal=2)

コード例 #16

0

ファイルを表示

ファイル: test_UnSupervisedIOHMM.py プロジェクト: vishalbelsare/IOHMM

    def test_train_covariates_for_transition(self):
        self.model = UnSupervisedIOHMM(num_states=2,
                                       max_EM_iter=100,
                                       EM_tol=1e-6)
        self.model.set_models(model_initial=CrossEntropyMNL(solver='newton-cg',
                                                            reg_method='l2'),
                              model_transition=CrossEntropyMNL(
                                  solver='newton-cg', reg_method='l2'),
                              model_emissions=[OLS()])
        self.model.set_inputs(covariates_initial=[],
                              covariates_transition=['Pacc'],
                              covariates_emissions=[[]])
        self.model.set_outputs([['rt']])
        self.model.set_data([self.data_speed])
        self.model.train()
        # emission coefficients
        np.testing.assert_array_almost_equal(
            self.model.model_emissions[0][0].coef,
            np.array([[5.5]]),
            decimal=1)
        np.testing.assert_array_almost_equal(
            self.model.model_emissions[1][0].coef,
            np.array([[6.4]]),
            decimal=1)

        # emission dispersion
        np.testing.assert_array_almost_equal(
            self.model.model_emissions[0][0].dispersion,
            np.array([[0.036]]),
            decimal=2)
        np.testing.assert_array_almost_equal(
            self.model.model_emissions[1][0].dispersion,
            np.array([[0.063]]),
            decimal=2)

        # transition
        np.testing.assert_array_almost_equal(np.exp(
            self.model.model_transition[0].predict_log_proba(
                self.model.inp_transitions_all_sequences)).sum(axis=0),
                                             np.array([312, 126]),
                                             decimal=0)
        np.testing.assert_array_almost_equal(np.exp(
            self.model.model_transition[1].predict_log_proba(
                self.model.inp_transitions_all_sequences)).sum(axis=0),
                                             np.array([112, 326]),
                                             decimal=0)

コード例 #17

0

ファイルを表示

ファイル: test_OLS.py プロジェクト: codealphago/IOHMM

 def test_ols_one_data_point(self):
     self.model = OLS(
         solver='pinv', fit_intercept=True, est_stderr=True,
         reg_method=None,  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
         coef=None, stderr=None,  dispersion=None)
     self.model.fit(self.data_longley.exog[0:1, :],
                    self.data_longley.endog[0:1, ], sample_weight=0.5)
     # coef
     self.assertEqual(self.model.coef.shape, (1, 7))
     # scale
     np.testing.assert_array_almost_equal(self.model.dispersion, np.array([[0]]))
     # loglike_per_sample
     np.testing.assert_array_equal(self.model.loglike_per_sample(
         self.data_longley.exog[0:1, :], self.data_longley.endog[0:1, ]), np.array([0]))
     np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
         np.array(self.data_longley.exog[0:1, :].tolist() * 6),
         np.array([60323, 0, 60323, 60322, 60322, 60323])),
         np.array([0, -np.Infinity, 0, -np.Infinity, -np.Infinity, 0]), decimal=3)

コード例 #18

0

ファイルを表示

ファイル: test_OLS.py プロジェクト: codealphago/IOHMM

    def test_ols_sample_weight_all_half(self):
        self.model = OLS(
            solver='pinv', fit_intercept=True, est_stderr=True,
            reg_method=None,  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None,  dispersion=None)
        self.model.fit(self.X, self.Y, sample_weight=0.5)
        # coefficient
        print(self.model.coef)
        print(self.model.dispersion)
        print(self.model.stderr)
        print(self.model.loglike(self.X, self.Y, sample_weight=0.5))

        self.assertEqual(self.model.coef.shape, (2, 2))
        np.testing.assert_array_almost_equal(
            self.model.coef,
            np.array([[-0.02924966, -0.03484827],
                      [-0.00978688, 0.00336316]]).reshape(2, -1),
            decimal=3)
        # std.err of coefficient (calibrated by df_resid)
        self.assertEqual(self.model.stderr.shape, (2, 2))
        np.testing.assert_array_almost_equal(
            self.model.stderr,
            np.array([[0.03083908, 0.03121143],
                      [0.03002101, 0.03038348]]).reshape(2, -1),
            decimal=2)
        # scale
        self.assertEqual(self.model.dispersion.shape, (2, 2))
        np.testing.assert_array_almost_equal(
            self.model.dispersion,
            np.array([[0.94905363, 0.0164185],
                      [0.0164185, 0.89937019]]),
            decimal=3)
        # loglike/_per_sample
        self.assertAlmostEqual(
            self.model.loglike(self.X, self.Y, 0.5),
            old_div(-2758.54387369, 2.),
            places=3)

        self.assertEqual(
            self.model.loglike_per_sample(self.X, self.Y).shape,
            (1000, ))

コード例 #19

0

ファイルを表示

ファイル: test_OLS.py プロジェクト: codealphago/IOHMM

    def test_ols_sample_weight_all_half(self):
        self.model = OLS(
            solver='pinv', fit_intercept=True, est_stderr=True,
            reg_method=None,  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None,  dispersion=None)
        self.model.fit(self.data_longley.exog, self.data_longley.endog, sample_weight=0.5)
        # coefficient
        np.testing.assert_array_almost_equal(
            self.model.coef,
            np.array((-3482258.63459582, 15.0618722713733, -0.358191792925910E-01,
                      -2.02022980381683, -1.03322686717359, -0.511041056535807E-01,
                      1829.15146461355)).reshape(1, -1),
            decimal=3)
        # std.err of coefficient (calibrated by df_resid)
        np.testing.assert_array_almost_equal(
            old_div(self.model.stderr, np.sqrt(old_div(9., self.data_longley.exog.shape[0]))),
            np.array((890420.383607373, 84.9149257747669, 0.334910077722432E-01,
                      0.488399681651699, 0.214274163161675, 0.226073200069370,
                      455.478499142212)).reshape(1, -1),
            decimal=1)
        # scale
        np.testing.assert_array_almost_equal(
            old_div(self.model.dispersion, (old_div(9., self.data_longley.exog.shape[0]))),
            np.array((92936.0061673238)))
        # predict
        np.testing.assert_array_almost_equal(
            self.data_longley.endog.reshape(-1, 1) - self.model.predict(self.data_longley.exog),
            np.array((267.34003, -94.01394, 46.28717, -410.11462,
                      309.71459, -249.31122, -164.04896, -13.18036, 14.30477, 455.39409,
                      -17.26893, -39.05504, -155.54997, -85.67131, 341.93151,
                      -206.75783)).reshape(-1, 1),
            decimal=3)
        # loglike/_per_sample
        self.assertAlmostEqual(
            self.model.loglike(self.data_longley.exog, self.data_longley.endog),
            -109.61743480849013,
            places=3)

        self.assertEqual(
            self.model.loglike_per_sample(self.data_longley.exog, self.data_longley.endog).shape,
            (16, ))

コード例 #20

0

ファイルを表示

ファイル: test_UnSupervisedIOHMM.py プロジェクト: vishalbelsare/IOHMM

    def test_train_multivariate(self):
        self.model = UnSupervisedIOHMM(num_states=2,
                                       max_EM_iter=100,
                                       EM_tol=1e-6)
        self.model.set_models(
            model_initial=CrossEntropyMNL(solver='newton-cg', reg_method='l2'),
            model_transition=CrossEntropyMNL(solver='newton-cg',
                                             reg_method='l2'),
            model_emissions=[OLS(), DiscreteMNL(reg_method='l2')])
        self.model.set_inputs(covariates_initial=[],
                              covariates_transition=[],
                              covariates_emissions=[[], ['Pacc']])
        self.model.set_outputs([['rt'], ['corr']])
        self.model.set_data([self.data_speed])
        self.model.train()

        # emission coefficients
        np.testing.assert_array_almost_equal(
            self.model.model_emissions[0][0].coef,
            np.array([[5.5]]),
            decimal=1)
        np.testing.assert_array_almost_equal(
            self.model.model_emissions[1][0].coef,
            np.array([[6.4]]),
            decimal=1)

        # emission dispersion
        np.testing.assert_array_almost_equal(
            self.model.model_emissions[0][0].dispersion,
            np.array([[0.036]]),
            decimal=2)
        np.testing.assert_array_almost_equal(
            self.model.model_emissions[1][0].dispersion,
            np.array([[0.063]]),
            decimal=2)

        # transition
        np.testing.assert_array_almost_equal(np.exp(
            self.model.model_transition[0].predict_log_proba(
                self.model.inp_transitions_all_sequences)).sum(axis=0),
                                             np.array([387, 51]),
                                             decimal=0)
        np.testing.assert_array_almost_equal(np.exp(
            self.model.model_transition[1].predict_log_proba(
                self.model.inp_transitions_all_sequences)).sum(axis=0),
                                             np.array([37, 401.]),
                                             decimal=0)

        # to_json
        json_dict = self.model.to_json('tests/IOHMM_models/UnSupervisedIOHMM/')
        self.assertEqual(json_dict['data_type'], 'UnSupervisedIOHMM')
        self.assertSetEqual(
            set(json_dict['properties'].keys()),
            set([
                'num_states', 'EM_tol', 'max_EM_iter', 'covariates_initial',
                'covariates_transition', 'covariates_emissions',
                'responses_emissions', 'model_initial', 'model_transition',
                'model_emissions'
            ]))
        with open('tests/IOHMM_models/UnSupervisedIOHMM/model.json',
                  'w') as outfile:
            json.dump(json_dict, outfile, indent=4, sort_keys=True)

コード例 #21

0

ファイルを表示

ファイル: test_OLS.py プロジェクト: vishalbelsare/IOHMM

class PerfectCorrelationMultivariateOLSTests(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        np.random.seed(0)
        cls.data_longley = sm.datasets.longley.load()
        cls.X = cls.data_longley.exog
        cls.Y = np.hstack((cls.data_longley.endog.reshape(-1, 1),
                           cls.data_longley.endog.reshape(-1, 1)))

    def test_ols(self):
        self.model = OLS(solver='auto',
                         fit_intercept=True,
                         est_stderr=True,
                         reg_method=None,
                         alpha=0,
                         l1_ratio=0,
                         tol=1e-4,
                         max_iter=100,
                         coef=None,
                         stderr=None,
                         dispersion=None)
        self.model.fit(self.X, self.Y)
        # coefficient
        self.assertEqual(self.model.coef.shape, (2, 7))
        np.testing.assert_array_almost_equal(
            self.model.coef,
            np.array([[
                -3482258.63459582, 15.0618722713733, -0.358191792925910E-01,
                -2.02022980381683, -1.03322686717359, -0.511041056535807E-01,
                1829.15146461355
            ],
                      [
                          -3482258.63459582, 15.0618722713733,
                          -0.358191792925910E-01, -2.02022980381683,
                          -1.03322686717359, -0.511041056535807E-01,
                          1829.15146461355
                      ]]).reshape(2, -1),
            decimal=3)
        # std.err of coefficient (calibrated by df_resid)
        self.assertEqual(self.model.stderr.shape, (2, 7))
        np.testing.assert_array_almost_equal(
            old_div(self.model.stderr,
                    np.sqrt(old_div(9., self.data_longley.exog.shape[0]))),
            np.array([[
                890420.383607373, 84.9149257747669, 0.03349, 0.488399681651699,
                0.214274163161675, 0.226073200069370, 455.478499142212
            ],
                      [
                          890420.383607373, 84.9149257747669, 0.03349,
                          0.488399681651699, 0.214274163161675,
                          0.226073200069370, 455.478499142212
                      ]]).reshape(2, -1),
            decimal=2)
        # scale
        self.assertEqual(self.model.dispersion.shape, (2, 2))
        np.testing.assert_array_almost_equal(
            old_div(self.model.dispersion,
                    (old_div(9., self.data_longley.exog.shape[0]))),
            np.array([[92936.0061673238, 92936.0061673238],
                      [92936.0061673238, 92936.0061673238]]),
            decimal=3)
        # predict
        np.testing.assert_array_almost_equal(
            self.Y - self.model.predict(self.X),
            np.hstack((np.array([
                267.34003, -94.01394, 46.28717, -410.11462, 309.71459,
                -249.31122, -164.04896, -13.18036, 14.30477, 455.39409,
                -17.26893, -39.05504, -155.54997, -85.67131, 341.93151,
                -206.75783
            ]).reshape(-1, 1),
                       np.array([
                           267.34003, -94.01394, 46.28717, -410.11462,
                           309.71459, -249.31122, -164.04896, -13.18036,
                           14.30477, 455.39409, -17.26893, -39.05504,
                           -155.54997, -85.67131, 341.93151, -206.75783
                       ]).reshape(-1, 1))),
            decimal=3)
        # loglike/_per_sample
        self.assertRaises(ValueError, self.model.loglike_per_sample, self.X,
                          self.Y)

    def test_ols_l1_regularized(self):
        # sklearn elastic net and l1 does not take sample_weights, will not test
        pass

    def test_ols_l2_regularized(self):
        # there is a bug in sklearn with weights, it can only use list right now
        self.model = OLS(solver='auto',
                         fit_intercept=True,
                         est_stderr=True,
                         reg_method='l2',
                         alpha=0.1,
                         l1_ratio=0,
                         tol=1e-4,
                         max_iter=100,
                         coef=None,
                         stderr=None,
                         dispersion=None)
        self.model.fit(self.X, self.Y, sample_weight=0.5)

        # coefficient
        np.testing.assert_array_almost_equal(
            self.model.coef,
            np.array([[
                -2.0172203, -52.14364269, 0.07089677, -0.42552125, -0.57305292,
                -0.41272483, 48.32484052
            ],
                      [
                          -2.0172203, -52.14364269, 0.07089677, -0.42552125,
                          -0.57305292, -0.41272483, 48.32484052
                      ]]).reshape(2, -1),
            decimal=3)
        # std.err of coefficient (calibrated by df_resid)
        self.assertTrue(self.model.stderr is None)
        # scale
        self.assertEqual(self.model.dispersion.shape, (2, 2))
        np.testing.assert_array_almost_equal(
            old_div(self.model.dispersion,
                    (old_div(9., self.data_longley.exog.shape[0]))),
            np.array([[250870.081, 250870.081], [250870.081, 250870.081]]),
            decimal=3)
        # predict
        res = np.array([[280.31871146], [-131.6981265], [90.64414685],
                        [-400.10244445], [-440.59604167], [-543.88595187],
                        [200.70483416], [215.88629903], [74.9456573],
                        [913.85128645], [424.15996133], [-9.5797488],
                        [-360.96841852], [27.214226], [150.87705909],
                        [-492.17489392]])
        np.testing.assert_array_almost_equal(self.Y -
                                             self.model.predict(self.X),
                                             np.hstack((res, res)),
                                             decimal=3)

        # loglike/_per_sample
        self.assertRaises(ValueError, self.model.loglike, self.X, self.Y)

    def test_ols_elastic_net_regularized(self):
        # sklearn elastic net and l1 does not take sample_weights, will not test
        pass

    def test_ols_sample_weight_all_half(self):
        self.model = OLS(solver='pinv',
                         fit_intercept=True,
                         est_stderr=True,
                         reg_method=None,
                         alpha=0,
                         l1_ratio=0,
                         tol=1e-4,
                         max_iter=100,
                         coef=None,
                         stderr=None,
                         dispersion=None)
        self.model.fit(self.X, self.Y, sample_weight=0.5)
        # coefficient
        np.testing.assert_array_almost_equal(
            self.model.coef,
            np.array(
                ((-3482258.63459582, 15.0618722713733, -0.358191792925910E-01,
                  -2.02022980381683, -1.03322686717359, -0.511041056535807E-01,
                  1829.15146461355),
                 (-3482258.63459582, 15.0618722713733, -0.358191792925910E-01,
                  -2.02022980381683, -1.03322686717359, -0.511041056535807E-01,
                  1829.15146461355))).reshape(2, -1),
            decimal=3)
        # std.err of coefficient (calibrated by df_resid)
        np.testing.assert_array_almost_equal(
            old_div(self.model.stderr,
                    np.sqrt(old_div(9., self.data_longley.exog.shape[0]))),
            np.array(
                ((890420.383607373, 84.9149257747669, 0.334910077722432E-01,
                  0.488399681651699, 0.214274163161675, 0.226073200069370,
                  455.478499142212),
                 (890420.383607373, 84.9149257747669, 0.334910077722432E-01,
                  0.488399681651699, 0.214274163161675, 0.226073200069370,
                  455.478499142212))).reshape(2, -1),
            decimal=1)
        # scale
        np.testing.assert_array_almost_equal(
            old_div(self.model.dispersion,
                    (old_div(9., self.data_longley.exog.shape[0]))),
            np.array(((92936.0061673238, 92936.0061673238),
                      (92936.0061673238, 92936.0061673238))),
            decimal=3)
        # predict
        res = np.array(
            (267.34003, -94.01394, 46.28717, -410.11462, 309.71459, -249.31122,
             -164.04896, -13.18036, 14.30477, 455.39409, -17.26893, -39.05504,
             -155.54997, -85.67131, 341.93151, -206.75783)).reshape(-1, 1)
        np.testing.assert_array_almost_equal(self.Y -
                                             self.model.predict(self.X),
                                             np.hstack((res, res)),
                                             decimal=3)
        # loglike/_per_sample
        self.assertRaises(ValueError, self.model.loglike, self.X, self.Y)

    def test_ols_sample_weight_all_zero(self):
        self.model = OLS(solver='pinv',
                         fit_intercept=True,
                         est_stderr=True,
                         reg_method=None,
                         alpha=0,
                         l1_ratio=0,
                         tol=1e-4,
                         max_iter=100,
                         coef=None,
                         stderr=None,
                         dispersion=None)
        self.assertRaises(ValueError, self.model.fit, self.X, self.Y, 0)

    def test_ols_sample_weight_half_zero_half_one(self):
        self.model = OLS(solver='pinv',
                         fit_intercept=True,
                         est_stderr=True,
                         reg_method=None,
                         alpha=0,
                         l1_ratio=0,
                         tol=1e-4,
                         max_iter=100,
                         coef=None,
                         stderr=None,
                         dispersion=None)
        len_half = 8
        self.model.fit(self.X,
                       self.Y,
                       sample_weight=np.array(
                           [1] * len_half + [0] *
                           (self.data_longley.exog.shape[0] - len_half)))
        self.model_half = OLS(solver='pinv',
                              fit_intercept=True,
                              est_stderr=True,
                              reg_method=None,
                              alpha=0,
                              l1_ratio=0,
                              max_iter=100,
                              coef=None,
                              stderr=None,
                              dispersion=None)
        self.model_half.fit(self.X[:len_half], self.Y[:len_half])
        # coefficient
        np.testing.assert_array_almost_equal(self.model.coef,
                                             self.model_half.coef,
                                             decimal=3)
        # std.err
        np.testing.assert_array_almost_equal(self.model.stderr,
                                             self.model_half.stderr,
                                             decimal=3)

        # scale
        np.testing.assert_array_almost_equal(self.model.dispersion,
                                             self.model_half.dispersion,
                                             decimal=3)

    # corner cases
    def test_ols_one_data_point(self):
        self.model = OLS(solver='pinv',
                         fit_intercept=True,
                         est_stderr=True,
                         reg_method=None,
                         alpha=0,
                         l1_ratio=0,
                         tol=1e-4,
                         max_iter=100,
                         coef=None,
                         stderr=None,
                         dispersion=None)
        self.model.fit(self.X[0:1, :], self.Y[0:1, ], sample_weight=0.5)
        # coef
        self.assertEqual(self.model.coef.shape, (2, 7))
        # scale
        np.testing.assert_array_almost_equal(self.model.dispersion,
                                             np.array([[0, 0], [0, 0]]),
                                             decimal=6)
        # loglike_per_sample
        np.testing.assert_array_equal(
            self.model.loglike_per_sample(self.X[0:1, :], self.Y[0:1, ]),
            np.array([0]))
        np.testing.assert_array_almost_equal(
            self.model.loglike_per_sample(
                np.array(self.X[0:1, :].tolist() * 6),
                np.array([[60323, 60323], [0, 60323], [60323, 60323],
                          [60322, 60323], [60322, 60322], [60323, 60323]])),
            np.array([0, -np.Infinity, 0, -np.Infinity, -np.Infinity, 0]),
            decimal=3)

    def test_ols_multicolinearty(self):
        self.model_col = OLS(solver='pinv',
                             fit_intercept=False,
                             est_stderr=True,
                             reg_method=None,
                             alpha=0,
                             l1_ratio=0,
                             tol=1e-4,
                             max_iter=100,
                             coef=None,
                             stderr=None,
                             dispersion=None)
        X = np.hstack([self.X[:, 0:1], self.X[:, 0:1]])
        self.model_col.fit(X, self.Y, sample_weight=0.8)
        self.model = OLS(solver='pinv',
                         fit_intercept=False,
                         est_stderr=True,
                         reg_method=None,
                         alpha=0,
                         l1_ratio=0,
                         tol=1e-4,
                         max_iter=100,
                         coef=None,
                         stderr=None,
                         dispersion=None)
        self.model.fit(self.X[:, 0:1], self.Y, sample_weight=0.8)
        # coef
        np.testing.assert_array_almost_equal(
            self.model_col.coef,
            np.array([[319.47969664, 319.47969664],
                      [319.47969664, 319.47969664]]).reshape(2, -1),
            decimal=3)
        # stderr
        self.assertEqual(self.model_col.stderr, None)
        # scale
        np.testing.assert_array_almost_equal(self.model_col.dispersion,
                                             self.model.dispersion,
                                             decimal=3)
        # loglike_per_sample
        self.assertRaises(ValueError, self.model_col.loglike, X, self.Y)
        np.testing.assert_array_almost_equal(self.model_col.predict(X),
                                             self.model.predict(self.X[:,
                                                                       0:1]),
                                             decimal=3)

コード例 #22

0

ファイルを表示

ファイル: test_OLS.py プロジェクト: vishalbelsare/IOHMM

class IndependentMultivariateOLSTests(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        np.random.seed(0)
        cls.X = np.random.normal(size=(1000, 1))
        cls.Y = np.random.normal(size=(cls.X.shape[0], 2))

    def test_ols(self):
        self.model = OLS(solver='pinv',
                         fit_intercept=True,
                         est_stderr=True,
                         reg_method=None,
                         alpha=0,
                         l1_ratio=0,
                         tol=1e-4,
                         max_iter=100,
                         coef=None,
                         stderr=None,
                         dispersion=None)
        self.model.fit(self.X, self.Y)
        # coefficient
        self.assertEqual(self.model.coef.shape, (2, 2))
        np.testing.assert_array_almost_equal(
            self.model.coef,
            np.array([[-0.02924966, -0.03484827],
                      [-0.00978688, 0.00336316]]).reshape(2, -1),
            decimal=3)
        # std.err of coefficient (calibrated by df_resid)
        self.assertEqual(self.model.stderr.shape, (2, 2))
        np.testing.assert_array_almost_equal(
            self.model.stderr,
            np.array([[0.03083908, 0.03121143], [0.03002101,
                                                 0.03038348]]).reshape(2, -1),
            decimal=2)
        # scale
        self.assertEqual(self.model.dispersion.shape, (2, 2))
        np.testing.assert_array_almost_equal(self.model.dispersion,
                                             np.array([[0.94905363, 0.0164185],
                                                       [0.0164185,
                                                        0.89937019]]),
                                             decimal=3)
        # loglike/_per_sample
        self.assertAlmostEqual(self.model.loglike(self.X, self.Y),
                               -2758.54387369,
                               places=3)

        # to_json
        json_dict = self.model.to_json(
            './tests/linear_models/OLS/MultivariateOLS/')
        self.assertEqual(json_dict['properties']['solver'], 'pinv')

        # from_json
        self.model_from_json = OLS.from_json(json_dict)
        np.testing.assert_array_almost_equal(self.model.coef,
                                             self.model_from_json.coef,
                                             decimal=3)
        np.testing.assert_array_almost_equal(self.model.stderr,
                                             self.model_from_json.stderr,
                                             decimal=3)
        np.testing.assert_array_almost_equal(self.model.dispersion,
                                             self.model_from_json.dispersion,
                                             decimal=3)

    def test_ols_l2_regularized(self):
        self.model = OLS(solver='auto',
                         fit_intercept=True,
                         est_stderr=True,
                         reg_method='l2',
                         alpha=0.1,
                         l1_ratio=1,
                         tol=1e-4,
                         max_iter=100,
                         coef=None,
                         stderr=None,
                         dispersion=None)
        self.model.fit(self.X, self.Y)
        # coefficient
        self.assertEqual(self.model.coef.shape, (2, 2))
        np.testing.assert_array_almost_equal(
            self.model.coef,
            np.array([[-0.0292465, -0.03484456], [-0.00978591,
                                                  0.00336286]]).reshape(2, -1),
            decimal=3)
        # std.err of coefficient (calibrated by df_resid)
        self.assertTrue(self.model.stderr is None)
        # scale
        self.assertEqual(self.model.dispersion.shape, (2, 2))
        np.testing.assert_array_almost_equal(self.model.dispersion,
                                             np.array([[0.94905363, 0.0164185],
                                                       [0.0164185,
                                                        0.89937019]]),
                                             decimal=3)
        # loglike/_per_sample
        self.assertAlmostEqual(self.model.loglike(self.X, self.Y),
                               -2758.5438737,
                               places=3)

    def test_ols_l1_regularized(self):
        # sklearn l1 and elstic net does not support sample weight
        pass

    def test_ols_sample_weight_all_half(self):
        self.model = OLS(solver='pinv',
                         fit_intercept=True,
                         est_stderr=True,
                         reg_method=None,
                         alpha=0,
                         l1_ratio=0,
                         tol=1e-4,
                         max_iter=100,
                         coef=None,
                         stderr=None,
                         dispersion=None)
        self.model.fit(self.X, self.Y, sample_weight=0.5)
        # coefficient
        self.assertEqual(self.model.coef.shape, (2, 2))
        np.testing.assert_array_almost_equal(
            self.model.coef,
            np.array([[-0.02924966, -0.03484827],
                      [-0.00978688, 0.00336316]]).reshape(2, -1),
            decimal=3)
        # std.err of coefficient (calibrated by df_resid)
        self.assertEqual(self.model.stderr.shape, (2, 2))
        np.testing.assert_array_almost_equal(
            self.model.stderr,
            np.array([[0.03083908, 0.03121143], [0.03002101,
                                                 0.03038348]]).reshape(2, -1),
            decimal=2)
        # scale
        self.assertEqual(self.model.dispersion.shape, (2, 2))
        np.testing.assert_array_almost_equal(self.model.dispersion,
                                             np.array([[0.94905363, 0.0164185],
                                                       [0.0164185,
                                                        0.89937019]]),
                                             decimal=3)
        # loglike/_per_sample
        self.assertAlmostEqual(self.model.loglike(self.X, self.Y, 0.5),
                               old_div(-2758.54387369, 2.),
                               places=3)

        self.assertEqual(
            self.model.loglike_per_sample(self.X, self.Y).shape, (1000, ))

    def test_ols_sample_weight_all_zero(self):
        self.model = OLS(solver='pinv',
                         fit_intercept=True,
                         est_stderr=True,
                         reg_method=None,
                         alpha=0,
                         l1_ratio=0,
                         tol=1e-4,
                         max_iter=100,
                         coef=None,
                         stderr=None,
                         dispersion=None)
        self.assertRaises(ValueError, self.model.fit, self.X, self.Y, 0)

    def test_ols_sample_weight_half_zero_half_one(self):
        self.model = OLS(solver='pinv',
                         fit_intercept=True,
                         est_stderr=True,
                         reg_method=None,
                         alpha=0,
                         l1_ratio=0,
                         tol=1e-4,
                         max_iter=100,
                         coef=None,
                         stderr=None,
                         dispersion=None)
        len_half = 500
        self.model.fit(self.X,
                       self.Y,
                       sample_weight=np.array([1] * len_half + [0] *
                                              (self.X.shape[0] - len_half)))
        self.model_half = OLS(solver='pinv',
                              fit_intercept=True,
                              est_stderr=True,
                              reg_method=None,
                              alpha=0,
                              l1_ratio=0,
                              max_iter=100,
                              coef=None,
                              stderr=None,
                              dispersion=None)
        self.model_half.fit(self.X[:len_half], self.Y[:len_half])
        # coefficient
        np.testing.assert_array_almost_equal(self.model.coef,
                                             self.model_half.coef,
                                             decimal=3)
        # std.err
        np.testing.assert_array_almost_equal(self.model.stderr,
                                             self.model_half.stderr,
                                             decimal=3)

        # scale
        np.testing.assert_array_almost_equal(self.model.dispersion,
                                             self.model_half.dispersion,
                                             decimal=3)

    # corner cases
    def test_ols_one_data_point(self):
        self.model = OLS(solver='pinv',
                         fit_intercept=True,
                         est_stderr=True,
                         reg_method=None,
                         alpha=0,
                         l1_ratio=0,
                         tol=1e-4,
                         max_iter=100,
                         coef=None,
                         stderr=None,
                         dispersion=None)
        self.model.fit(self.X[0:1, :], self.Y[0:1, ], sample_weight=0.5)
        # coef
        self.assertEqual(self.model.coef.shape, (2, 2))
        # scale
        np.testing.assert_array_almost_equal(self.model.dispersion,
                                             np.array([[0, 0], [0, 0]]),
                                             decimal=6)
        # loglike_per_sample
        np.testing.assert_array_equal(
            self.model.loglike_per_sample(self.X[0:1, :], self.Y[0:1, ]),
            np.array([0]))

        np.testing.assert_array_almost_equal(
            self.model.loglike_per_sample(
                np.array(self.X[0:1, :].tolist() * 6),
                np.array([
                    self.Y[0, ], self.Y[1, ], self.Y[0, ], self.Y[1, ],
                    self.Y[1, ], self.Y[0, ]
                ])),
            np.array([0, -np.Infinity, 0, -np.Infinity, -np.Infinity, 0]),
            decimal=3)

    def test_ols_multicolinearty(self):
        self.model_col = OLS(solver='pinv',
                             fit_intercept=False,
                             est_stderr=True,
                             reg_method=None,
                             alpha=0,
                             l1_ratio=0,
                             tol=1e-4,
                             max_iter=100,
                             coef=None,
                             stderr=None,
                             dispersion=None)
        X = np.hstack([self.X[:, 0:1], self.X[:, 0:1]])
        self.model_col.fit(X, self.Y, sample_weight=0.5)
        self.model = OLS(solver='pinv',
                         fit_intercept=False,
                         est_stderr=True,
                         reg_method=None,
                         alpha=0,
                         l1_ratio=0,
                         tol=1e-4,
                         max_iter=100,
                         coef=None,
                         stderr=None,
                         dispersion=None)
        self.model.fit(self.X[:, 0:1], self.Y, sample_weight=0.5)
        # stderr
        self.assertEqual(self.model_col.stderr, None)
        # scale
        np.testing.assert_array_almost_equal(self.model_col.dispersion,
                                             self.model.dispersion,
                                             decimal=3)
        # loglike_per_sample
        np.testing.assert_array_almost_equal(
            self.model_col.loglike_per_sample(X, self.Y),
            self.model.loglike_per_sample(self.X[:, 0:1], self.Y),
            decimal=0)
        np.testing.assert_array_almost_equal(self.model_col.predict(X),
                                             self.model.predict(self.X[:,
                                                                       0:1]),
                                             decimal=1)

コード例 #23

0

ファイルを表示

ファイル: test_OLS.py プロジェクト: codealphago/IOHMM

class IndependentMultivariateOLSTests(unittest.TestCase):

    @classmethod
    def setUpClass(cls):
        np.random.seed(0)
        cls.X = np.random.normal(size=(1000, 1))
        cls.Y = np.random.normal(size=(cls.X.shape[0], 2))

    def test_ols(self):
        self.model = OLS(
            solver='pinv', fit_intercept=True, est_stderr=True,
            reg_method=None,  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None,  dispersion=None)
        self.model.fit(self.X, self.Y)
        # coefficient
        print(self.model.coef)
        print(self.model.dispersion)
        print(self.model.stderr)
        print(self.Y - self.model.predict(self.X))
        print(self.model.loglike(self.X, self.Y))

        self.assertEqual(self.model.coef.shape, (2, 2))
        np.testing.assert_array_almost_equal(
            self.model.coef,
            np.array([[-0.02924966, -0.03484827],
                      [-0.00978688, 0.00336316]]).reshape(2, -1),
            decimal=3)
        # std.err of coefficient (calibrated by df_resid)
        self.assertEqual(self.model.stderr.shape, (2, 2))
        np.testing.assert_array_almost_equal(
            self.model.stderr,
            np.array([[0.03083908, 0.03121143],
                      [0.03002101, 0.03038348]]).reshape(2, -1),
            decimal=2)
        # scale
        self.assertEqual(self.model.dispersion.shape, (2, 2))
        np.testing.assert_array_almost_equal(
            self.model.dispersion,
            np.array([[0.94905363, 0.0164185],
                      [0.0164185, 0.89937019]]),
            decimal=3)
        # loglike/_per_sample
        self.assertAlmostEqual(
            self.model.loglike(self.X, self.Y),
            -2758.54387369,
            places=3)

        # to_json
        json_dict = self.model.to_json('./tests/linear_models/OLS/MultivariateOLS/')
        self.assertEqual(json_dict['properties']['solver'], 'pinv')

        # from_json
        self.model_from_json = OLS.from_json(json_dict)
        np.testing.assert_array_almost_equal(
            self.model.coef,
            self.model_from_json.coef,
            decimal=3)
        np.testing.assert_array_almost_equal(
            self.model.stderr,
            self.model_from_json.stderr,
            decimal=3)
        np.testing.assert_array_almost_equal(
            self.model.dispersion,
            self.model_from_json.dispersion,
            decimal=3)

    def test_ols_l2_regularized(self):
        self.model = OLS(
            solver='auto', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0.1, l1_ratio=1,  tol=1e-4, max_iter=100,
            coef=None, stderr=None,  dispersion=None)
        self.model.fit(self.X, self.Y)
        # coefficient
        print(self.model.coef)
        print(self.model.dispersion)
        print(self.model.loglike(self.X, self.Y))

        self.assertEqual(self.model.coef.shape, (2, 2))
        np.testing.assert_array_almost_equal(
            self.model.coef,
            np.array([[-0.0292465, -0.03484456],
                      [-0.00978591, 0.00336286]]).reshape(2, -1),
            decimal=3)
        # std.err of coefficient (calibrated by df_resid)
        self.assertTrue(self.model.stderr is None)
        # scale
        self.assertEqual(self.model.dispersion.shape, (2, 2))
        np.testing.assert_array_almost_equal(
            self.model.dispersion,
            np.array([[0.94905363, 0.0164185],
                      [0.0164185, 0.89937019]]),
            decimal=3)
        # loglike/_per_sample
        self.assertAlmostEqual(
            self.model.loglike(self.X, self.Y),
            -2758.5438737,
            places=3)

    def test_ols_l1_regularized(self):
        # sklearn l1 and elstic net does not support sample weight
        pass

    def test_ols_sample_weight_all_half(self):
        self.model = OLS(
            solver='pinv', fit_intercept=True, est_stderr=True,
            reg_method=None,  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None,  dispersion=None)
        self.model.fit(self.X, self.Y, sample_weight=0.5)
        # coefficient
        print(self.model.coef)
        print(self.model.dispersion)
        print(self.model.stderr)
        print(self.model.loglike(self.X, self.Y, sample_weight=0.5))

        self.assertEqual(self.model.coef.shape, (2, 2))
        np.testing.assert_array_almost_equal(
            self.model.coef,
            np.array([[-0.02924966, -0.03484827],
                      [-0.00978688, 0.00336316]]).reshape(2, -1),
            decimal=3)
        # std.err of coefficient (calibrated by df_resid)
        self.assertEqual(self.model.stderr.shape, (2, 2))
        np.testing.assert_array_almost_equal(
            self.model.stderr,
            np.array([[0.03083908, 0.03121143],
                      [0.03002101, 0.03038348]]).reshape(2, -1),
            decimal=2)
        # scale
        self.assertEqual(self.model.dispersion.shape, (2, 2))
        np.testing.assert_array_almost_equal(
            self.model.dispersion,
            np.array([[0.94905363, 0.0164185],
                      [0.0164185, 0.89937019]]),
            decimal=3)
        # loglike/_per_sample
        self.assertAlmostEqual(
            self.model.loglike(self.X, self.Y, 0.5),
            old_div(-2758.54387369, 2.),
            places=3)

        self.assertEqual(
            self.model.loglike_per_sample(self.X, self.Y).shape,
            (1000, ))

    def test_ols_sample_weight_all_zero(self):
        self.model = OLS(
            solver='pinv', fit_intercept=True, est_stderr=True,
            reg_method=None,  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None,  dispersion=None)
        self.assertRaises(ValueError, self.model.fit, self.X, self.Y, 0)

    def test_ols_sample_weight_half_zero_half_one(self):
        self.model = OLS(
            solver='pinv', fit_intercept=True, est_stderr=True,
            reg_method=None,  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None,  dispersion=None)
        len_half = 500
        self.model.fit(self.X, self.Y,
                       sample_weight=np.array([1] * len_half +
                                              [0] * (self.X.shape[0] - len_half)))
        self.model_half = OLS(
            solver='pinv', fit_intercept=True, est_stderr=True,
            reg_method=None,  alpha=0, l1_ratio=0,  max_iter=100,
            coef=None, stderr=None,  dispersion=None)
        self.model_half.fit(self.X[:len_half], self.Y[:len_half])
        # coefficient
        np.testing.assert_array_almost_equal(
            self.model.coef,
            self.model_half.coef,
            decimal=3)
        # std.err
        np.testing.assert_array_almost_equal(
            self.model.stderr,
            self.model_half.stderr,
            decimal=3)

        # scale
        np.testing.assert_array_almost_equal(
            self.model.dispersion,
            self.model_half.dispersion,
            decimal=3)

    # corner cases
    def test_ols_one_data_point(self):
        self.model = OLS(
            solver='pinv', fit_intercept=True, est_stderr=True,
            reg_method=None,  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None,  dispersion=None)
        self.model.fit(self.X[0:1, :],
                       self.Y[0:1, ], sample_weight=0.5)
        # coef
        self.assertEqual(self.model.coef.shape, (2, 2))
        # scale
        np.testing.assert_array_almost_equal(
            self.model.dispersion, np.array([[0, 0], [0, 0]]), decimal=6)
        # loglike_per_sample
        np.testing.assert_array_equal(self.model.loglike_per_sample(
            self.X[0:1, :], self.Y[0:1, ]), np.array([0]))

        np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
            np.array(self.X[0:1, :].tolist() * 6),
            np.array([self.Y[0, ], self.Y[1, ], self.Y[0, ],
                      self.Y[1, ], self.Y[1, ], self.Y[0, ]])),
            np.array([0, -np.Infinity, 0, -np.Infinity, -np.Infinity, 0]), decimal=3)

    def test_ols_multicolinearty(self):
        self.model_col = OLS(
            solver='pinv', fit_intercept=False, est_stderr=True,
            reg_method=None,  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None,  dispersion=None)
        X = np.hstack([self.X[:, 0:1], self.X[:, 0:1]])
        self.model_col.fit(X,
                           self.Y, sample_weight=0.5)
        self.model = OLS(
            solver='pinv', fit_intercept=False, est_stderr=True,
            reg_method=None,  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None,  dispersion=None)
        self.model.fit(self.X[:, 0:1],
                       self.Y, sample_weight=0.5)
        # stderr
        self.assertEqual(self.model_col.stderr, None)
        # scale
        np.testing.assert_array_almost_equal(
            self.model_col.dispersion, self.model.dispersion, decimal=3)
        # loglike_per_sample
        np.testing.assert_array_almost_equal(
            self.model_col.loglike_per_sample(X, self.Y),
            self.model.loglike_per_sample(self.X[:, 0:1],
                                          self.Y), decimal=0)
        np.testing.assert_array_almost_equal(
            self.model_col.predict(X),
            self.model.predict(self.X[:, 0:1]), decimal=1)

コード例 #24

0

ファイルを表示

ファイル: test_SemiSupervisedIOHMM.py プロジェクト: vishalbelsare/IOHMM

    def test_train_no_covariates(self):
        np.random.seed(0)
        self.model = SemiSupervisedIOHMM(num_states=4,
                                         max_EM_iter=100,
                                         EM_tol=1e-10)
        self.model.set_models(model_initial=CrossEntropyMNL(solver='newton-cg',
                                                            reg_method='l2'),
                              model_transition=CrossEntropyMNL(
                                  solver='newton-cg', reg_method='l2'),
                              model_emissions=[OLS()])
        self.model.set_inputs(covariates_initial=[],
                              covariates_transition=[],
                              covariates_emissions=[[]])
        self.model.set_outputs([['rt']])
        self.model.set_data([[self.data_speed, self.states]])
        self.model.train()
        # emission coefficients
        np.testing.assert_array_almost_equal(
            self.model.model_emissions[0][0].coef, np.array([[0]]), decimal=1)
        np.testing.assert_array_almost_equal(
            self.model.model_emissions[1][0].coef, np.array([[1]]), decimal=1)
        np.testing.assert_array_almost_equal(
            self.model.model_emissions[2][0].coef,
            np.array([[6.4]]),
            decimal=1)
        np.testing.assert_array_almost_equal(
            self.model.model_emissions[3][0].coef,
            np.array([[5.5]]),
            decimal=1)

        # emission dispersion
        np.testing.assert_array_almost_equal(
            self.model.model_emissions[0][0].dispersion,
            np.array([[0]]),
            decimal=2)
        np.testing.assert_array_almost_equal(
            self.model.model_emissions[1][0].dispersion,
            np.array([[0]]),
            decimal=2)
        np.testing.assert_array_almost_equal(
            self.model.model_emissions[2][0].dispersion,
            np.array([[0.051]]),
            decimal=2)
        np.testing.assert_array_almost_equal(
            self.model.model_emissions[3][0].dispersion,
            np.array([[0.032]]),
            decimal=2)

        # transition
        np.testing.assert_array_almost_equal(np.exp(
            self.model.model_transition[0].predict_log_proba(np.array([[]]))),
                                             np.array([[0.4, 0.6, 0, 0]]),
                                             decimal=1)
        np.testing.assert_array_almost_equal(np.exp(
            self.model.model_transition[1].predict_log_proba(np.array([[]]))),
                                             np.array([[0.19, 0.81, 0, 0]]),
                                             decimal=2)
        np.testing.assert_array_almost_equal(np.exp(
            self.model.model_transition[2].predict_log_proba(np.array([[]]))),
                                             np.array([[0, 0, 0.93, 0.07]]),
                                             decimal=2)
        np.testing.assert_array_almost_equal(np.exp(
            self.model.model_transition[3].predict_log_proba(np.array([[]]))),
                                             np.array([[0, 0, 0.11, 0.89]]),
                                             decimal=2)

        # to_json
        json_dict = self.model.to_json(
            'tests/IOHMM_models/SemiSupervisedIOHMM/')
        self.assertEqual(json_dict['data_type'], 'SemiSupervisedIOHMM')
        self.assertSetEqual(
            set(json_dict['properties'].keys()),
            set([
                'num_states', 'EM_tol', 'max_EM_iter', 'covariates_initial',
                'covariates_transition', 'covariates_emissions',
                'responses_emissions', 'model_initial', 'model_transition',
                'model_emissions'
            ]))
        with open('tests/IOHMM_models/SemiSupervisedIOHMM/model.json',
                  'w') as outfile:
            json.dump(json_dict, outfile, indent=4, sort_keys=True)

コード例 #25

0

ファイルを表示

ファイル: test_OLS.py プロジェクト: codealphago/IOHMM

class PerfectCorrelationMultivariateOLSTests(unittest.TestCase):

    @classmethod
    def setUpClass(cls):
        np.random.seed(0)
        cls.data_longley = sm.datasets.longley.load()
        cls.X = cls.data_longley.exog
        cls.Y = np.hstack((cls.data_longley.endog.reshape(-1, 1),
                           cls.data_longley.endog.reshape(-1, 1)))

    def test_ols(self):
        self.model = OLS(
            solver='auto', fit_intercept=True, est_stderr=True,
            reg_method=None,  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None,  dispersion=None)
        self.model.fit(self.X, self.Y)
        # coefficient
        self.assertEqual(self.model.coef.shape, (2, 7))
        np.testing.assert_array_almost_equal(
            self.model.coef,
            np.array([[-3482258.63459582, 15.0618722713733, -0.358191792925910E-01,
                       -2.02022980381683, -1.03322686717359, -0.511041056535807E-01,
                       1829.15146461355],
                      [-3482258.63459582, 15.0618722713733, -0.358191792925910E-01,
                       -2.02022980381683, -1.03322686717359, -0.511041056535807E-01,
                       1829.15146461355]]).reshape(2, -1),
            decimal=3)
        # std.err of coefficient (calibrated by df_resid)
        self.assertEqual(self.model.stderr.shape, (2, 7))
        np.testing.assert_array_almost_equal(
            old_div(self.model.stderr, np.sqrt(old_div(9., self.data_longley.exog.shape[0]))),
            np.array([[890420.383607373, 84.9149257747669, 0.03349,
                       0.488399681651699, 0.214274163161675, 0.226073200069370,
                       455.478499142212],
                      [890420.383607373, 84.9149257747669, 0.03349,
                       0.488399681651699, 0.214274163161675, 0.226073200069370,
                       455.478499142212]]).reshape(2, -1),
            decimal=2)
        # scale
        self.assertEqual(self.model.dispersion.shape, (2, 2))
        np.testing.assert_array_almost_equal(
            old_div(self.model.dispersion, (old_div(9., self.data_longley.exog.shape[0]))),
            np.array([[92936.0061673238, 92936.0061673238],
                      [92936.0061673238, 92936.0061673238]]),
            decimal=3)
        # predict
        np.testing.assert_array_almost_equal(
            self.Y - self.model.predict(self.X),
            np.hstack((np.array([267.34003, -94.01394, 46.28717, -410.11462,
                                 309.71459, -249.31122, -164.04896, -13.18036, 14.30477, 455.39409,
                                 -17.26893, -39.05504, -155.54997, -85.67131, 341.93151,
                                 -206.75783]).reshape(-1, 1),
                       np.array([267.34003, -94.01394, 46.28717, -410.11462,
                                 309.71459, -249.31122, -164.04896, -13.18036, 14.30477, 455.39409,
                                 -17.26893, -39.05504, -155.54997, -85.67131, 341.93151,
                                 -206.75783]).reshape(-1, 1))),
            decimal=3)
        # loglike/_per_sample
        self.assertRaises(ValueError,
                          self.model.loglike_per_sample, self.X, self.Y)

    def test_ols_l1_regularized(self):
        # sklearn elastic net and l1 does not take sample_weights, will not test
        pass

    def test_ols_l2_regularized(self):
        # there is a bug in sklearn with weights, it can only use list right now
        self.model = OLS(
            solver='auto', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0.1, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None,  dispersion=None)
        self.model.fit(self.X, self.Y, sample_weight=0.5)

        # coefficient
        np.testing.assert_array_almost_equal(
            self.model.coef,
            np.array([[-2.0172203, -52.14364269, 0.07089677, -0.42552125,
                       -0.57305292, -0.41272483, 48.32484052],
                      [-2.0172203, -52.14364269, 0.07089677, -0.42552125,
                       -0.57305292, -0.41272483, 48.32484052]]).reshape(2, -1),
            decimal=3)
        # std.err of coefficient (calibrated by df_resid)
        self.assertTrue(self.model.stderr is None)
        # scale
        self.assertEqual(self.model.dispersion.shape, (2, 2))
        np.testing.assert_array_almost_equal(
            old_div(self.model.dispersion, (old_div(9., self.data_longley.exog.shape[0]))),
            np.array([[250870.081, 250870.081],
                      [250870.081, 250870.081]]),
            decimal=3)
        # predict
        res = np.array([[280.31871146],
                        [-131.6981265],
                        [90.64414685],
                        [-400.10244445],
                        [-440.59604167],
                        [-543.88595187],
                        [200.70483416],
                        [215.88629903],
                        [74.9456573],
                        [913.85128645],
                        [424.15996133],
                        [-9.5797488],
                        [-360.96841852],
                        [27.214226],
                        [150.87705909],
                        [-492.17489392]])
        np.testing.assert_array_almost_equal(
            self.Y - self.model.predict(self.X),
            np.hstack((res, res)),
            decimal=3)

        # loglike/_per_sample
        self.assertRaises(ValueError,
                          self.model.loglike, self.X, self.Y)

    def test_ols_elastic_net_regularized(self):
        # sklearn elastic net and l1 does not take sample_weights, will not test
        pass

    def test_ols_sample_weight_all_half(self):
        self.model = OLS(
            solver='pinv', fit_intercept=True, est_stderr=True,
            reg_method=None,  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None,  dispersion=None)
        self.model.fit(self.X, self.Y, sample_weight=0.5)
        # coefficient
        np.testing.assert_array_almost_equal(
            self.model.coef,
            np.array(((-3482258.63459582, 15.0618722713733, -0.358191792925910E-01,
                       -2.02022980381683, -1.03322686717359, -0.511041056535807E-01,
                       1829.15146461355),
                      (-3482258.63459582, 15.0618722713733, -0.358191792925910E-01,
                       -2.02022980381683, -1.03322686717359, -0.511041056535807E-01,
                       1829.15146461355))).reshape(2, -1),
            decimal=3)
        # std.err of coefficient (calibrated by df_resid)
        np.testing.assert_array_almost_equal(
            old_div(self.model.stderr, np.sqrt(old_div(9., self.data_longley.exog.shape[0]))),
            np.array(((890420.383607373, 84.9149257747669, 0.334910077722432E-01,
                       0.488399681651699, 0.214274163161675, 0.226073200069370,
                       455.478499142212),
                      (890420.383607373, 84.9149257747669, 0.334910077722432E-01,
                       0.488399681651699, 0.214274163161675, 0.226073200069370,
                       455.478499142212))).reshape(2, -1),
            decimal=1)
        # scale
        np.testing.assert_array_almost_equal(
            old_div(self.model.dispersion, (old_div(9., self.data_longley.exog.shape[0]))),
            np.array(((92936.0061673238, 92936.0061673238),
                      (92936.0061673238, 92936.0061673238))),
            decimal=3)
        # predict
        res = np.array((267.34003, -94.01394, 46.28717, -410.11462,
                        309.71459, -249.31122, -164.04896, -13.18036, 14.30477, 455.39409,
                        -17.26893, -39.05504, -155.54997, -85.67131, 341.93151,
                        -206.75783)).reshape(-1, 1)
        np.testing.assert_array_almost_equal(
            self.Y - self.model.predict(self.X),
            np.hstack((res, res)),
            decimal=3)
        # loglike/_per_sample
        self.assertRaises(ValueError,
                          self.model.loglike, self.X, self.Y)

    def test_ols_sample_weight_all_zero(self):
        self.model = OLS(
            solver='pinv', fit_intercept=True, est_stderr=True,
            reg_method=None,  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None,  dispersion=None)
        self.assertRaises(ValueError, self.model.fit, self.X, self.Y, 0)

    def test_ols_sample_weight_half_zero_half_one(self):
        self.model = OLS(
            solver='pinv', fit_intercept=True, est_stderr=True,
            reg_method=None,  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None,  dispersion=None)
        len_half = 8
        self.model.fit(self.X, self.Y,
                       sample_weight=np.array([1] * len_half +
                                              [0] * (self.data_longley.exog.shape[0] - len_half)))
        self.model_half = OLS(
            solver='pinv', fit_intercept=True, est_stderr=True,
            reg_method=None,  alpha=0, l1_ratio=0,  max_iter=100,
            coef=None, stderr=None,  dispersion=None)
        self.model_half.fit(self.X[:len_half], self.Y[:len_half])
        # coefficient
        np.testing.assert_array_almost_equal(
            self.model.coef,
            self.model_half.coef,
            decimal=3)
        # std.err
        np.testing.assert_array_almost_equal(
            self.model.stderr,
            self.model_half.stderr,
            decimal=3)

        # scale
        np.testing.assert_array_almost_equal(
            self.model.dispersion,
            self.model_half.dispersion,
            decimal=3)

    # corner cases
    def test_ols_one_data_point(self):
        self.model = OLS(
            solver='pinv', fit_intercept=True, est_stderr=True,
            reg_method=None,  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None,  dispersion=None)
        self.model.fit(self.X[0:1, :],
                       self.Y[0:1, ], sample_weight=0.5)
        # coef
        self.assertEqual(self.model.coef.shape, (2, 7))
        # scale
        np.testing.assert_array_almost_equal(
            self.model.dispersion, np.array([[0, 0], [0, 0]]), decimal=6)
        # loglike_per_sample
        np.testing.assert_array_equal(self.model.loglike_per_sample(
            self.X[0:1, :], self.Y[0:1, ]), np.array([0]))
        np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
            np.array(self.X[0:1, :].tolist() * 6),
            np.array([[60323, 60323], [0, 60323], [60323, 60323],
                      [60322, 60323], [60322, 60322], [60323, 60323]])),
            np.array([0, -np.Infinity, 0, -np.Infinity, -np.Infinity, 0]), decimal=3)

    def test_ols_multicolinearty(self):
        self.model_col = OLS(
            solver='pinv', fit_intercept=False, est_stderr=True,
            reg_method=None,  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None,  dispersion=None)
        X = np.hstack([self.X[:, 0:1], self.X[:, 0:1]])
        self.model_col.fit(X,
                           self.Y, sample_weight=0.8)
        self.model = OLS(
            solver='pinv', fit_intercept=False, est_stderr=True,
            reg_method=None,  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None,  dispersion=None)
        self.model.fit(self.X[:, 0:1],
                       self.Y, sample_weight=0.8)
        # coef
        np.testing.assert_array_almost_equal(
            self.model_col.coef, np.array([[319.47969664, 319.47969664],
                                           [319.47969664, 319.47969664]]).reshape(2, -1), decimal=3)
        # stderr
        self.assertEqual(self.model_col.stderr, None)
        # scale
        np.testing.assert_array_almost_equal(
            self.model_col.dispersion, self.model.dispersion, decimal=3)
        # loglike_per_sample
        self.assertRaises(ValueError,
                          self.model_col.loglike, X, self.Y)
        np.testing.assert_array_almost_equal(
            self.model_col.predict(X),
            self.model.predict(self.X[:, 0:1]), decimal=3)