Esempio n. 1
0
 def test_lr_sample_weight_all_zero(self):
     self.model = DiscreteMNL(
         solver='lbfgs', fit_intercept=True, est_stderr=True,
         reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
         coef=None, stderr=None, classes=None)
     self.assertRaises(ValueError, self.model.fit,
                       self.data_anes96.exog, self.data_anes96.endog, 0)
Esempio n. 2
0
    def test_lr(self):
        self.model = DiscreteMNL(
            solver='newton-cg', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, classes=None)
        self.model.fit(self.data_anes96.exog, self.data_anes96.endog)
        # coefficient
        # predict
        self.assertEqual(
            np.sum(self.model.predict(self.data_anes96.exog) ==
                   self.data_anes96.endog), 372)
        # loglike/_per_sample
        self.assertAlmostEqual(
            self.model.loglike(self.data_anes96.exog, self.data_anes96.endog),
            -1461.9227472481984,
            places=3)
        # to_json
        json_dict = self.model.to_json('./tests/linear_models/DiscreteMNL/Multinomial/')
        self.assertEqual(json_dict['properties']['solver'], 'newton-cg')

        # from_json
        self.model_from_json = DiscreteMNL.from_json(json_dict)
        np.testing.assert_array_almost_equal(
            self.model.coef,
            self.model_from_json.coef,
            decimal=3)
        np.testing.assert_array_almost_equal(
            self.model.classes, np.array(list(range(7))), decimal=3)
        self.assertEqual(self.model.n_classes, 7)
Esempio n. 3
0
    def test_lr_multicolinearty(self):
        self.model_col = DiscreteMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, classes=None)
        X = np.hstack([self.data_spector.exog[:, 0:1], self.data_spector.exog[:, 0:1]])
        self.model_col.fit(X,
                           self.data_spector.endog, sample_weight=0.5)
        self.model = DiscreteMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, classes=None)
        self.model.fit(self.data_spector.exog[:, 0:1],
                       self.data_spector.endog, sample_weight=0.5)

        np.testing.assert_array_almost_equal(
            self.model_col.coef, np.array([[-9.703,  1.42002783,  1.42002783]]), decimal=3)
        # loglike_per_sample
        np.testing.assert_array_almost_equal(
            self.model_col.loglike_per_sample(X, self.data_spector.endog),
            self.model.loglike_per_sample(self.data_spector.exog[:, 0:1],
                                          self.data_spector.endog), decimal=3)
        np.testing.assert_array_almost_equal(
            self.model_col.predict(X),
            self.model.predict(self.data_spector.exog[:, 0:1]), decimal=3)
Esempio n. 4
0
 def test_lr_sample_weight_all_half(self):
     self.model = DiscreteMNL(
         solver='lbfgs', fit_intercept=True, est_stderr=True,
         reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
         coef=None, stderr=None, classes=None)
     self.model.fit(self.data_spector.exog, self.y, sample_weight=.5)
     # coefficient
     np.testing.assert_array_equal(
         self.model.coef,
         np.zeros((4, 1)))
     # loglike/_per_sample
     self.assertEqual(
         self.model.loglike(self.data_spector.exog, self.y, sample_weight=.5), 0)
Esempio n. 5
0
 def test_lr_regularized(self):
     self.model = DiscreteMNL(
         solver='newton-cg', fit_intercept=True, est_stderr=True,
         reg_method='l2',  alpha=10, l1_ratio=0,  tol=1e-4, max_iter=100,
         coef=None, stderr=None, classes=None)
     self.model.fit(self.data_anes96.exog, self.data_anes96.endog)
     # predict
     self.assertEqual(
         np.sum(self.model.predict(self.data_anes96.exog) ==
                self.data_anes96.endog), 333)
     # loglike/_per_sample
     self.assertAlmostEqual(
         self.model.loglike(self.data_anes96.exog, self.data_anes96.endog),
         -1540.888456277886,
         places=3)
Esempio n. 6
0
 def test_lr_sample_weight_all_half(self):
     self.model = DiscreteMNL(
         solver='lbfgs', fit_intercept=True, est_stderr=True,
         reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
         coef=None, stderr=None, classes=None)
     self.model.fit(self.data_spector.exog, self.data_spector.endog, sample_weight=.5)
     # coefficient
     np.testing.assert_array_almost_equal(
         self.model.coef,
         np.array([[-13.021, 2.8261, .09515, 2.378]]),
         decimal=3)
     # loglike/_per_sample
     self.assertAlmostEqual(
         self.model.loglike(self.data_spector.exog, self.data_spector.endog, sample_weight=.5),
         old_div(-12.8896334653335, 2.),
         places=3)
Esempio n. 7
0
 def test_lr_sample_weight_half_zero_half_one(self):
     self.model = DiscreteMNL(
         solver='lbfgs', fit_intercept=True, est_stderr=True,
         reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
         coef=None, stderr=None, classes=None)
     len_half = 500
     self.model.fit(self.data_anes96.exog, self.data_anes96.endog,
                    sample_weight=np.array([1] * len_half +
                                           [0] * (self.data_anes96.exog.shape[0] - len_half)))
     self.model_half = DiscreteMNL(
         solver='lbfgs', fit_intercept=True, est_stderr=True,
         reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
         coef=None, stderr=None, classes=None)
     self.model_half.fit(self.data_anes96.exog[:len_half], self.data_anes96.endog[:len_half])
     # coefficient
     np.testing.assert_array_almost_equal(
         self.model.coef,
         self.model_half.coef,
         decimal=3)
Esempio n. 8
0
 def test_lr_three_data_point(self):
     # with regularization
     self.model = DiscreteMNL(
         solver='lbfgs', fit_intercept=True, est_stderr=True,
         reg_method='l2',  alpha=.1, l1_ratio=0,  tol=1e-4, max_iter=100,
         coef=None, stderr=None, classes=None)
     self.model.fit(self.data_anes96.exog[6:9, :],
                    self.data_anes96.endog[6:9, ], sample_weight=0.5)
     # coef
     self.assertEqual(self.model.coef.shape, (3, 6))
     # loglike_per_sample
     np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
         self.data_anes96.exog[6:9, :], np.array([1, 4, 3])),
         np.array([-0.015, -0.089, -0.095]), decimal=3)
     np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
         self.data_anes96.exog[6:9, :], np.array([3, 1, 4])),
         np.array([-4.2, -5.046, -2.827]), decimal=3)
     np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
         self.data_anes96.exog[6:9, :], np.array([3, 0, 5])),
         np.array([-4.2, -np.Infinity,  -np.Infinity]), decimal=3)
Esempio n. 9
0
    def test_lr(self):
        self.model = DiscreteMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, classes=None)
        self.model.fit(self.data_spector.exog, self.y)
        # coefficient
        np.testing.assert_array_equal(
            self.model.coef,
            np.zeros((4, 1)))

        # predict
        np.testing.assert_array_equal(
            self.model.predict(self.data_spector.exog),
            np.array(['foo'] * self.data_spector.endog.shape[0]))
        # loglike/_per_sample
        np.testing.assert_array_equal(
            self.model.loglike_per_sample(self.data_spector.exog,
                                          np.array(['bar'] * 16 + ['foo'] * 16)),
            np.array([-np.Infinity] * 16 + [0] * 16))
Esempio n. 10
0
    def test_lr_one_data_point(self):
        # with regularization
        self.model = DiscreteMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, classes=None)
        self.model.fit(self.data_spector.exog[4:5, :],
                       self.y[4:5, ], sample_weight=0.5)
        # coef
        np.testing.assert_array_equal(
            self.model.coef,
            np.zeros((4, 1)))
        # loglike_per_sample
        np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
            self.data_spector.exog[4:6, :], np.array(['foo', 'foo'])),
            np.array([0, 0]), decimal=3)

        np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
            self.data_spector.exog[4:6, :], np.array(['foo', 'bar'])),
            np.array([0, -np.Infinity]), decimal=3)
Esempio n. 11
0
 def test_lr_sample_weight_all_half(self):
     self.model_half = DiscreteMNL(
         solver='newton-cg', fit_intercept=True, est_stderr=True,
         reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
         coef=None, stderr=None, classes=None)
     self.model_half.fit(self.data_anes96.exog, self.data_anes96.endog, sample_weight=.5)
     self.model = DiscreteMNL(
         solver='newton-cg', fit_intercept=True, est_stderr=True,
         reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
         coef=None, stderr=None, classes=None)
     self.model.fit(self.data_anes96.exog, self.data_anes96.endog)
     # coefficient
     np.testing.assert_array_almost_equal(self.model.coef, self.model_half.coef, decimal=3)
     # predict
     self.assertEqual(
         np.sum(self.model.predict(self.data_anes96.exog) ==
                self.data_anes96.endog), 372)
     # loglike/_per_sample
     self.assertAlmostEqual(
         self.model.loglike(self.data_anes96.exog, self.data_anes96.endog, sample_weight=.5),
         old_div(-1461.92274725, 2.),
         places=3)
Esempio n. 12
0
    def test_lr(self):
        self.model = DiscreteMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, classes=None)
        self.model.fit(self.data_spector.exog, self.data_spector.endog)
        # coefficient
        np.testing.assert_array_almost_equal(
            self.model.coef,
            np.array([[-13.021, 2.8261, .09515, 2.378]]),
            decimal=3)

        # predict
        np.testing.assert_array_almost_equal(
            self.model.predict(self.data_spector.exog),
            np.array((0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,
                      0.,  0.,  0.,  0.,  0.,  1.,  1.,  0.,  1.,  0.,  1.,  1.,  0.,
                      1.,  0.,  1.,  1.,  1.,  0.)),
            decimal=3)
        # loglike/_per_sample
        self.assertAlmostEqual(
            self.model.loglike(self.data_spector.exog, self.data_spector.endog),
            -12.8896334653335,
            places=3)
        # to_json
        json_dict = self.model.to_json('./tests/linear_models/DiscreteMNL/Binary/')
        self.assertEqual(json_dict['properties']['solver'], 'lbfgs')

        # from_json
        self.model_from_json = DiscreteMNL.from_json(json_dict)
        np.testing.assert_array_almost_equal(
            self.model.coef,
            self.model_from_json.coef,
            decimal=3)
        np.testing.assert_array_almost_equal(
            self.model.classes, np.array([0, 1]), decimal=3)
        self.assertEqual(self.model.n_classes, 2)
Esempio n. 13
0
    def test_lr_regularized(self):
        self.model = DiscreteMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=.01, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, classes=None)
        self.model.fit(self.data_spector.exog, self.data_spector.endog)
        # coefficient
        np.testing.assert_array_almost_equal(
            self.model.coef,
            np.array([[-10.66,   2.364,   0.064,   2.142]]),
            decimal=3)

        # predict
        np.testing.assert_array_almost_equal(
            self.model.predict(self.data_spector.exog),
            np.array((0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,
                      0.,  0.,  0.,  0.,  0.,  1.,  1.,  0.,  1.,  0.,  1.,  1.,  0.,
                      1.,  0.,  1.,  1.,  1.,  0.)),
            decimal=3)
        # loglike/_per_sample
        self.assertAlmostEqual(
            self.model.loglike(self.data_spector.exog, self.data_spector.endog),
            -13.016861222748519,
            places=3)
Esempio n. 14
0
 def test_lr_two_data_point(self):
     # with regularization
     self.model = DiscreteMNL(
         solver='lbfgs', fit_intercept=True, est_stderr=True,
         reg_method='l2',  alpha=.01, l1_ratio=0,  tol=1e-4, max_iter=100,
         coef=None, stderr=None, classes=None)
     self.model.fit(self.data_spector.exog[4:6, :],
                    self.data_spector.endog[4:6, ], sample_weight=0.5)
     # coef
     self.assertEqual(self.model.coef.shape, (1, 4))
     # loglike_per_sample
     np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
         self.data_spector.exog[4:6, :], self.data_spector.endog[4:6, ]),
         np.array([-0.226, -0.289]), decimal=3)
     # with no regularization
     self.model = DiscreteMNL(
         solver='lbfgs', fit_intercept=True, est_stderr=True,
         reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
         coef=None, stderr=None, classes=None)
     self.model.fit(self.data_spector.exog[4:6, :],
                    self.data_spector.endog[4:6, ], sample_weight=0.5)
     # coef
     self.assertEqual(self.model.coef.shape, (1, 4))
     # loglike_per_sample
     np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
         self.data_spector.exog[4:6, :], self.data_spector.endog[4:6, ]),
         np.array([0, 0]), decimal=3)
     # class in reverse
     self.model = DiscreteMNL(
         solver='lbfgs', fit_intercept=True, est_stderr=True,
         reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
         coef=None, stderr=None, classes=None)
     self.model.fit(self.data_spector.exog[3:5, :],
                    self.data_spector.endog[3:5, ], sample_weight=0.5)
     # coef
     self.assertEqual(self.model.coef.shape, (1, 4))
     # loglike_per_sample
     np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
         self.data_spector.exog[3:5, :], self.data_spector.endog[3:5, ]),
         np.array([0, 0]), decimal=3)
     print(self.model.classes, 'class')
     np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
         self.data_spector.exog[3:5, :], np.array([0, 2])),
         np.array([0, -np.Infinity]), decimal=3)
    def test_train_multivariate(self):
        self.model = UnSupervisedIOHMM(num_states=2,
                                       max_EM_iter=100,
                                       EM_tol=1e-6)
        self.model.set_models(
            model_initial=CrossEntropyMNL(solver='newton-cg', reg_method='l2'),
            model_transition=CrossEntropyMNL(solver='newton-cg',
                                             reg_method='l2'),
            model_emissions=[OLS(), DiscreteMNL(reg_method='l2')])
        self.model.set_inputs(covariates_initial=[],
                              covariates_transition=[],
                              covariates_emissions=[[], ['Pacc']])
        self.model.set_outputs([['rt'], ['corr']])
        self.model.set_data([self.data_speed])
        self.model.train()

        # emission coefficients
        np.testing.assert_array_almost_equal(
            self.model.model_emissions[0][0].coef,
            np.array([[5.5]]),
            decimal=1)
        np.testing.assert_array_almost_equal(
            self.model.model_emissions[1][0].coef,
            np.array([[6.4]]),
            decimal=1)

        # emission dispersion
        np.testing.assert_array_almost_equal(
            self.model.model_emissions[0][0].dispersion,
            np.array([[0.036]]),
            decimal=2)
        np.testing.assert_array_almost_equal(
            self.model.model_emissions[1][0].dispersion,
            np.array([[0.063]]),
            decimal=2)

        # transition
        np.testing.assert_array_almost_equal(np.exp(
            self.model.model_transition[0].predict_log_proba(
                self.model.inp_transitions_all_sequences)).sum(axis=0),
                                             np.array([387, 51]),
                                             decimal=0)
        np.testing.assert_array_almost_equal(np.exp(
            self.model.model_transition[1].predict_log_proba(
                self.model.inp_transitions_all_sequences)).sum(axis=0),
                                             np.array([37, 401.]),
                                             decimal=0)

        # to_json
        json_dict = self.model.to_json('tests/IOHMM_models/UnSupervisedIOHMM/')
        self.assertEqual(json_dict['data_type'], 'UnSupervisedIOHMM')
        self.assertSetEqual(
            set(json_dict['properties'].keys()),
            set([
                'num_states', 'EM_tol', 'max_EM_iter', 'covariates_initial',
                'covariates_transition', 'covariates_emissions',
                'responses_emissions', 'model_initial', 'model_transition',
                'model_emissions'
            ]))
        with open('tests/IOHMM_models/UnSupervisedIOHMM/model.json',
                  'w') as outfile:
            json.dump(json_dict, outfile, indent=4, sort_keys=True)
Esempio n. 16
0
class DiscreteMNLBinaryTests(unittest.TestCase):

    @classmethod
    def setUpClass(cls):
        cls.data_spector = sm.datasets.spector.load()

    def test_lr(self):
        self.model = DiscreteMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, classes=None)
        self.model.fit(self.data_spector.exog, self.data_spector.endog)
        # coefficient
        np.testing.assert_array_almost_equal(
            self.model.coef,
            np.array([[-13.021, 2.8261, .09515, 2.378]]),
            decimal=3)

        # predict
        np.testing.assert_array_almost_equal(
            self.model.predict(self.data_spector.exog),
            np.array((0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,
                      0.,  0.,  0.,  0.,  0.,  1.,  1.,  0.,  1.,  0.,  1.,  1.,  0.,
                      1.,  0.,  1.,  1.,  1.,  0.)),
            decimal=3)
        # loglike/_per_sample
        self.assertAlmostEqual(
            self.model.loglike(self.data_spector.exog, self.data_spector.endog),
            -12.8896334653335,
            places=3)
        # to_json
        json_dict = self.model.to_json('./tests/linear_models/DiscreteMNL/Binary/')
        self.assertEqual(json_dict['properties']['solver'], 'lbfgs')

        # from_json
        self.model_from_json = DiscreteMNL.from_json(json_dict)
        np.testing.assert_array_almost_equal(
            self.model.coef,
            self.model_from_json.coef,
            decimal=3)
        np.testing.assert_array_almost_equal(
            self.model.classes, np.array([0, 1]), decimal=3)
        self.assertEqual(self.model.n_classes, 2)

    def test_lr_regularized(self):
        self.model = DiscreteMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=.01, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, classes=None)
        self.model.fit(self.data_spector.exog, self.data_spector.endog)
        # coefficient
        np.testing.assert_array_almost_equal(
            self.model.coef,
            np.array([[-10.66,   2.364,   0.064,   2.142]]),
            decimal=3)

        # predict
        np.testing.assert_array_almost_equal(
            self.model.predict(self.data_spector.exog),
            np.array((0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,
                      0.,  0.,  0.,  0.,  0.,  1.,  1.,  0.,  1.,  0.,  1.,  1.,  0.,
                      1.,  0.,  1.,  1.,  1.,  0.)),
            decimal=3)
        # loglike/_per_sample
        self.assertAlmostEqual(
            self.model.loglike(self.data_spector.exog, self.data_spector.endog),
            -13.016861222748519,
            places=3)

    def test_lr_sample_weight_all_half(self):
        self.model = DiscreteMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, classes=None)
        self.model.fit(self.data_spector.exog, self.data_spector.endog, sample_weight=.5)
        # coefficient
        np.testing.assert_array_almost_equal(
            self.model.coef,
            np.array([[-13.021, 2.8261, .09515, 2.378]]),
            decimal=3)
        # loglike/_per_sample
        self.assertAlmostEqual(
            self.model.loglike(self.data_spector.exog, self.data_spector.endog, sample_weight=.5),
            old_div(-12.8896334653335, 2.),
            places=3)

    def test_lr_sample_weight_all_zero(self):
        self.model = DiscreteMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, classes=None)
        self.assertRaises(ValueError, self.model.fit,
                          self.data_spector.exog, self.data_spector.endog, 0)

    def test_lr_sample_weight_half_zero_half_one(self):
        self.model = DiscreteMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, classes=None)
        len_half = 8
        self.model.fit(self.data_spector.exog, self.data_spector.endog,
                       sample_weight=np.array([1] * len_half +
                                              [0] * (self.data_spector.exog.shape[0] - len_half)))
        self.model_half = DiscreteMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, classes=None)
        self.model_half.fit(self.data_spector.exog[:len_half], self.data_spector.endog[:len_half])
        # coefficient
        np.testing.assert_array_almost_equal(
            self.model.coef,
            self.model_half.coef,
            decimal=3)

    # corner cases
    def test_lr_two_data_point(self):
        # with regularization
        self.model = DiscreteMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=.01, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, classes=None)
        self.model.fit(self.data_spector.exog[4:6, :],
                       self.data_spector.endog[4:6, ], sample_weight=0.5)
        # coef
        self.assertEqual(self.model.coef.shape, (1, 4))
        # loglike_per_sample
        np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
            self.data_spector.exog[4:6, :], self.data_spector.endog[4:6, ]),
            np.array([-0.226, -0.289]), decimal=3)
        # with no regularization
        self.model = DiscreteMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, classes=None)
        self.model.fit(self.data_spector.exog[4:6, :],
                       self.data_spector.endog[4:6, ], sample_weight=0.5)
        # coef
        self.assertEqual(self.model.coef.shape, (1, 4))
        # loglike_per_sample
        np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
            self.data_spector.exog[4:6, :], self.data_spector.endog[4:6, ]),
            np.array([0, 0]), decimal=3)
        # class in reverse
        self.model = DiscreteMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, classes=None)
        self.model.fit(self.data_spector.exog[3:5, :],
                       self.data_spector.endog[3:5, ], sample_weight=0.5)
        # coef
        self.assertEqual(self.model.coef.shape, (1, 4))
        # loglike_per_sample
        np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
            self.data_spector.exog[3:5, :], self.data_spector.endog[3:5, ]),
            np.array([0, 0]), decimal=3)
        print(self.model.classes, 'class')
        np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
            self.data_spector.exog[3:5, :], np.array([0, 2])),
            np.array([0, -np.Infinity]), decimal=3)

    def test_lr_multicolinearty(self):
        self.model_col = DiscreteMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, classes=None)
        X = np.hstack([self.data_spector.exog[:, 0:1], self.data_spector.exog[:, 0:1]])
        self.model_col.fit(X,
                           self.data_spector.endog, sample_weight=0.5)
        self.model = DiscreteMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, classes=None)
        self.model.fit(self.data_spector.exog[:, 0:1],
                       self.data_spector.endog, sample_weight=0.5)

        np.testing.assert_array_almost_equal(
            self.model_col.coef, np.array([[-9.703,  1.42002783,  1.42002783]]), decimal=3)
        # loglike_per_sample
        np.testing.assert_array_almost_equal(
            self.model_col.loglike_per_sample(X, self.data_spector.endog),
            self.model.loglike_per_sample(self.data_spector.exog[:, 0:1],
                                          self.data_spector.endog), decimal=3)
        np.testing.assert_array_almost_equal(
            self.model_col.predict(X),
            self.model.predict(self.data_spector.exog[:, 0:1]), decimal=3)
Esempio n. 17
0
class DiscreteMNLUnaryTests(unittest.TestCase):

    @classmethod
    def setUpClass(cls):
        cls.data_spector = sm.datasets.spector.load()
        cls.y = np.array(['foo'] * cls.data_spector.endog.shape[0])

    def test_lr(self):
        self.model = DiscreteMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, classes=None)
        self.model.fit(self.data_spector.exog, self.y)
        # coefficient
        np.testing.assert_array_equal(
            self.model.coef,
            np.zeros((4, 1)))

        # predict
        np.testing.assert_array_equal(
            self.model.predict(self.data_spector.exog),
            np.array(['foo'] * self.data_spector.endog.shape[0]))
        # loglike/_per_sample
        np.testing.assert_array_equal(
            self.model.loglike_per_sample(self.data_spector.exog,
                                          np.array(['bar'] * 16 + ['foo'] * 16)),
            np.array([-np.Infinity] * 16 + [0] * 16))

    def test_lr_sample_weight_all_half(self):
        self.model = DiscreteMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, classes=None)
        self.model.fit(self.data_spector.exog, self.y, sample_weight=.5)
        # coefficient
        np.testing.assert_array_equal(
            self.model.coef,
            np.zeros((4, 1)))
        # loglike/_per_sample
        self.assertEqual(
            self.model.loglike(self.data_spector.exog, self.y, sample_weight=.5), 0)

    # corner cases
    def test_lr_one_data_point(self):
        # with regularization
        self.model = DiscreteMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, classes=None)
        self.model.fit(self.data_spector.exog[4:5, :],
                       self.y[4:5, ], sample_weight=0.5)
        # coef
        np.testing.assert_array_equal(
            self.model.coef,
            np.zeros((4, 1)))
        # loglike_per_sample
        np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
            self.data_spector.exog[4:6, :], np.array(['foo', 'foo'])),
            np.array([0, 0]), decimal=3)

        np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
            self.data_spector.exog[4:6, :], np.array(['foo', 'bar'])),
            np.array([0, -np.Infinity]), decimal=3)
Esempio n. 18
0
class DiscreteMNLMultinomialTests(unittest.TestCase):

    @classmethod
    def setUpClass(cls):
        cls.data_anes96 = sm.datasets.anes96.load()

    def test_lr(self):
        self.model = DiscreteMNL(
            solver='newton-cg', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, classes=None)
        self.model.fit(self.data_anes96.exog, self.data_anes96.endog)
        # coefficient
        # predict
        self.assertEqual(
            np.sum(self.model.predict(self.data_anes96.exog) ==
                   self.data_anes96.endog), 372)
        # loglike/_per_sample
        self.assertAlmostEqual(
            self.model.loglike(self.data_anes96.exog, self.data_anes96.endog),
            -1461.9227472481984,
            places=3)
        # to_json
        json_dict = self.model.to_json('./tests/linear_models/DiscreteMNL/Multinomial/')
        self.assertEqual(json_dict['properties']['solver'], 'newton-cg')

        # from_json
        self.model_from_json = DiscreteMNL.from_json(json_dict)
        np.testing.assert_array_almost_equal(
            self.model.coef,
            self.model_from_json.coef,
            decimal=3)
        np.testing.assert_array_almost_equal(
            self.model.classes, np.array(list(range(7))), decimal=3)
        self.assertEqual(self.model.n_classes, 7)

    def test_lr_regularized(self):
        self.model = DiscreteMNL(
            solver='newton-cg', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=10, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, classes=None)
        self.model.fit(self.data_anes96.exog, self.data_anes96.endog)
        # predict
        self.assertEqual(
            np.sum(self.model.predict(self.data_anes96.exog) ==
                   self.data_anes96.endog), 333)
        # loglike/_per_sample
        self.assertAlmostEqual(
            self.model.loglike(self.data_anes96.exog, self.data_anes96.endog),
            -1540.888456277886,
            places=3)

    def test_lr_sample_weight_all_half(self):
        self.model_half = DiscreteMNL(
            solver='newton-cg', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, classes=None)
        self.model_half.fit(self.data_anes96.exog, self.data_anes96.endog, sample_weight=.5)
        self.model = DiscreteMNL(
            solver='newton-cg', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, classes=None)
        self.model.fit(self.data_anes96.exog, self.data_anes96.endog)
        # coefficient
        np.testing.assert_array_almost_equal(self.model.coef, self.model_half.coef, decimal=3)
        # predict
        self.assertEqual(
            np.sum(self.model.predict(self.data_anes96.exog) ==
                   self.data_anes96.endog), 372)
        # loglike/_per_sample
        self.assertAlmostEqual(
            self.model.loglike(self.data_anes96.exog, self.data_anes96.endog, sample_weight=.5),
            old_div(-1461.92274725, 2.),
            places=3)

    def test_lr_sample_weight_all_zero(self):
        self.model = DiscreteMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, classes=None)
        self.assertRaises(ValueError, self.model.fit,
                          self.data_anes96.exog, self.data_anes96.endog, 0)

    def test_lr_sample_weight_half_zero_half_one(self):
        self.model = DiscreteMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, classes=None)
        len_half = 500
        self.model.fit(self.data_anes96.exog, self.data_anes96.endog,
                       sample_weight=np.array([1] * len_half +
                                              [0] * (self.data_anes96.exog.shape[0] - len_half)))
        self.model_half = DiscreteMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, classes=None)
        self.model_half.fit(self.data_anes96.exog[:len_half], self.data_anes96.endog[:len_half])
        # coefficient
        np.testing.assert_array_almost_equal(
            self.model.coef,
            self.model_half.coef,
            decimal=3)

    # corner cases
    def test_lr_three_data_point(self):
        # with regularization
        self.model = DiscreteMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=.1, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, classes=None)
        self.model.fit(self.data_anes96.exog[6:9, :],
                       self.data_anes96.endog[6:9, ], sample_weight=0.5)
        # coef
        self.assertEqual(self.model.coef.shape, (3, 6))
        # loglike_per_sample
        np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
            self.data_anes96.exog[6:9, :], np.array([1, 4, 3])),
            np.array([-0.015, -0.089, -0.095]), decimal=3)
        np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
            self.data_anes96.exog[6:9, :], np.array([3, 1, 4])),
            np.array([-4.2, -5.046, -2.827]), decimal=3)
        np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
            self.data_anes96.exog[6:9, :], np.array([3, 0, 5])),
            np.array([-4.2, -np.Infinity,  -np.Infinity]), decimal=3)

    def test_lr_multicolinearty(self):
        self.model_col = DiscreteMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, classes=None)
        X = np.hstack([self.data_anes96.exog[:, 0:1], self.data_anes96.exog[:, 0:1]])
        self.model_col.fit(X,
                           self.data_anes96.endog, sample_weight=0.5)
        self.model = DiscreteMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, classes=None)
        self.model.fit(self.data_anes96.exog[:, 0:1],
                       self.data_anes96.endog, sample_weight=0.5)
        # loglike_per_sample
        np.testing.assert_array_almost_equal(
            self.model_col.loglike_per_sample(X, self.data_anes96.endog),
            self.model.loglike_per_sample(self.data_anes96.exog[:, 0:1],
                                          self.data_anes96.endog), decimal=3)
        np.testing.assert_array_almost_equal(
            self.model_col.predict(X),
            self.model.predict(self.data_anes96.exog[:, 0:1]), decimal=3)