def test_lr_disturbed_sample_weight_half_zero_half_one(self):
     self.model = CrossEntropyMNL(solver='lbfgs',
                                  fit_intercept=True,
                                  est_stderr=True,
                                  reg_method='l2',
                                  alpha=0,
                                  l1_ratio=0,
                                  tol=1e-4,
                                  max_iter=100,
                                  coef=None,
                                  stderr=None,
                                  n_classes=None)
     len_half = 500
     self.model.fit(self.data_anes96.exog,
                    self.y_disturbed,
                    sample_weight=np.array(
                        [1] * len_half + [0] *
                        (self.data_anes96.exog.shape[0] - len_half)))
     self.model_half = CrossEntropyMNL(solver='lbfgs',
                                       fit_intercept=True,
                                       est_stderr=True,
                                       reg_method='l2',
                                       alpha=0,
                                       l1_ratio=0,
                                       tol=1e-4,
                                       max_iter=100,
                                       coef=None,
                                       stderr=None,
                                       n_classes=None)
     self.model_half.fit(self.data_anes96.exog[:len_half],
                         self.y_disturbed[:len_half])
     # coefficient
     np.testing.assert_array_almost_equal(self.model.coef,
                                          self.model_half.coef,
                                          decimal=3)
 def test_label_encoder(self):
     x = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
     y = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
     X_repeated, Y_repeated, sample_weight_repeated = \
         CrossEntropyMNL._label_encoder(x, y, np.ones(3))
     np.testing.assert_array_equal(
         X_repeated,
         np.array([[1, 2, 3], [1, 2, 3], [1, 2, 3], [4, 5, 6], [4, 5, 6],
                   [4, 5, 6], [7, 8, 9], [7, 8, 9], [7, 8, 9]]))
     np.testing.assert_array_equal(Y_repeated,
                                   np.array([0, 1, 2, 0, 1, 2, 0, 1, 2]))
     np.testing.assert_array_equal(sample_weight_repeated,
                                   np.array([1, 0, 0, 0, 1, 0, 0, 0, 1]))
     # with sample_weight
     x = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
     y = np.array([[0.5, 0.25, 0.25], [0.25, 0.5, 0.25], [0.25, 0.25, 0.5]])
     sample_weight = np.array([0.25, 0.5, 0.25])
     X_repeated, Y_repeated, sample_weight_repeated = \
         CrossEntropyMNL._label_encoder(x, y, sample_weight)
     np.testing.assert_array_equal(
         X_repeated,
         np.array([[1, 2, 3], [1, 2, 3], [1, 2, 3], [4, 5, 6], [4, 5, 6],
                   [4, 5, 6], [7, 8, 9], [7, 8, 9], [7, 8, 9]]))
     np.testing.assert_array_equal(Y_repeated,
                                   np.array([0, 1, 2, 0, 1, 2, 0, 1, 2]))
     np.testing.assert_array_equal(
         sample_weight_repeated,
         np.array([
             0.125, 0.0625, 0.0625, 0.125, 0.25, 0.125, 0.0625, 0.0625,
             0.125
         ]))
 def test_lr_one_data_point(self):
     # with regularization
     self.model = CrossEntropyMNL(solver='lbfgs',
                                  fit_intercept=True,
                                  est_stderr=True,
                                  reg_method='l2',
                                  alpha=0,
                                  l1_ratio=0,
                                  tol=1e-4,
                                  max_iter=100,
                                  coef=None,
                                  stderr=None,
                                  n_classes=None)
     self.model.fit(self.data_spector.exog[4:5, :],
                    self.y[4:5, ],
                    sample_weight=0.5)
     # coef
     np.testing.assert_array_equal(self.model.coef, np.zeros((4, 1)))
     # loglike_per_sample
     np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
         self.data_spector.exog[4:6, :],
         np.array([1, 0]).reshape(-1, 1)),
                                          np.array([0, -np.Infinity]),
                                          decimal=3)
     np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
         self.data_spector.exog[4:6, :],
         np.array([1, 1]).reshape(-1, 1)),
                                          np.array([0, 0]),
                                          decimal=3)
    def test_lr(self):
        self.model = CrossEntropyMNL(solver='lbfgs',
                                     fit_intercept=True,
                                     est_stderr=True,
                                     reg_method='l2',
                                     alpha=0,
                                     l1_ratio=0,
                                     tol=1e-4,
                                     max_iter=100,
                                     coef=None,
                                     stderr=None,
                                     n_classes=None)
        self.model.fit(self.data_spector.exog, self.y)
        # coefficient
        np.testing.assert_array_equal(self.model.coef, np.zeros((4, 1)))

        # predict
        np.testing.assert_array_equal(
            self.model.predict(self.data_spector.exog),
            np.array([0] * self.data_spector.endog.shape[0]))
        # loglike/_per_sample
        np.testing.assert_array_equal(
            self.model.loglike_per_sample(
                self.data_spector.exog,
                np.array([1] * 16 + [0] * 16).reshape(-1, 1)),
            np.array([0] * 16 + [-np.Infinity] * 16))
 def test_lr_disturbed_two_data_point(self):
     # with regularization
     self.model = CrossEntropyMNL(solver='lbfgs',
                                  fit_intercept=True,
                                  est_stderr=True,
                                  reg_method='l2',
                                  alpha=.1,
                                  l1_ratio=0,
                                  tol=1e-4,
                                  max_iter=100,
                                  coef=None,
                                  stderr=None,
                                  n_classes=None)
     self.model.fit(self.data_spector.exog[4:6, :],
                    self.y_disturbed[4:6, ],
                    sample_weight=0.5)
     # coef
     self.assertEqual(self.model.coef.shape, (1, 4))
     # loglike_per_sample
     np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
         self.data_spector.exog[4:6, :], self.y_disturbed[4:6, ]),
                                          np.array([-0.503, -0.662]),
                                          decimal=3)
     # loglike_per_sample
     np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
         self.data_spector.exog[4:6, :], np.array([[0, 0], [0.99, 0.01]])),
                                          np.array([-np.Infinity, -0.662]),
                                          decimal=3)
     np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
         self.data_spector.exog[4:6, :], np.array([[0, 0], [0.01, 0.99]])),
                                          np.array([-np.Infinity, -0.725]),
                                          decimal=3)
    def test_lr_disturbed_sample_weight_all_half(self):
        self.model = CrossEntropyMNL(solver='lbfgs',
                                     fit_intercept=True,
                                     est_stderr=True,
                                     reg_method='l2',
                                     alpha=0,
                                     l1_ratio=0,
                                     tol=1e-4,
                                     max_iter=100,
                                     coef=None,
                                     stderr=None,
                                     n_classes=None)
        self.model.fit(self.data_spector.exog,
                       self.y_disturbed,
                       sample_weight=.5)
        # coefficient
        np.testing.assert_array_almost_equal(
            self.model.coef,
            np.array([[-12.327, 2.686, 0.089, 2.258]]),
            decimal=3)

        # predict
        np.testing.assert_array_almost_equal(
            self.model.predict(self.data_spector.exog),
            np.array((0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
                      0., 0., 0., 0., 1., 1., 0., 1., 0., 1., 1., 0., 1., 0.,
                      1., 1., 1., 0.)),
            decimal=3)
        # loglike/_per_sample
        self.assertAlmostEqual(self.model.loglike(self.data_spector.exog,
                                                  self.y_disturbed,
                                                  sample_weight=.5),
                               old_div(-13.366314173353134, 2.),
                               places=3)
    def test_lr(self):
        self.model = CrossEntropyMNL(
            solver='newton-cg', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=10, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, n_classes=None)
        self.model.fit(self.data_anes96.exog, self.y)
        # coefficient
        # predict
        self.assertEqual(
            np.sum(self.model.predict(self.data_anes96.exog) ==
                   self.data_anes96.endog), 333)
        # loglike/_per_sample
        self.assertAlmostEqual(
            self.model.loglike(self.data_anes96.exog, self.y),
            -1540.888458338286,
            places=3)
        # to_json
        json_dict = self.model.to_json('./tests/linear_models/CrossentropyMNL/Multinomial/')
        self.assertEqual(json_dict['properties']['solver'], 'newton-cg')

        # from_json
        self.model_from_json = CrossEntropyMNL.from_json(json_dict)
        np.testing.assert_array_almost_equal(
            self.model.coef,
            self.model_from_json.coef,
            decimal=3)
        np.testing.assert_array_almost_equal(
            self.model.classes, np.array(list(range(7))), decimal=3)
        self.assertEqual(self.model.n_classes, 7)
    def test_lr_regularized(self):
        self.model = CrossEntropyMNL(solver='lbfgs',
                                     fit_intercept=True,
                                     est_stderr=True,
                                     reg_method='l2',
                                     alpha=.01,
                                     l1_ratio=0,
                                     tol=1e-4,
                                     max_iter=100,
                                     coef=None,
                                     stderr=None,
                                     n_classes=None)
        self.model.fit(self.data_spector.exog, self.y)
        # coefficient
        np.testing.assert_array_almost_equal(
            self.model.coef,
            np.array([[-10.66, 2.364, 0.064, 2.142]]),
            decimal=3)

        # predict
        np.testing.assert_array_almost_equal(
            self.model.predict(self.data_spector.exog),
            np.array((0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
                      0., 0., 0., 0., 1., 1., 0., 1., 0., 1., 1., 0., 1., 0.,
                      1., 1., 1., 0.)),
            decimal=3)
        # loglike/_per_sample
        self.assertAlmostEqual(self.model.loglike(self.data_spector.exog,
                                                  self.y),
                               -13.016861222748515,
                               places=3)
 def test_label_encoder(self):
     x = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
     y = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
     X_repeated, Y_repeated, sample_weight_repeated = \
         CrossEntropyMNL._label_encoder(x, y, np.ones(3))
     np.testing.assert_array_equal(
         X_repeated,
         np.array([
             [1, 2, 3], [1, 2, 3], [1, 2, 3],
             [4, 5, 6], [4, 5, 6], [4, 5, 6],
             [7, 8, 9], [7, 8, 9], [7, 8, 9]]))
     np.testing.assert_array_equal(
         Y_repeated,
         np.array([0, 1, 2, 0, 1, 2, 0, 1, 2]))
     np.testing.assert_array_equal(
         sample_weight_repeated,
         np.array([1, 0, 0, 0, 1, 0, 0, 0, 1]))
     # with sample_weight
     x = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
     y = np.array([[0.5, 0.25, 0.25], [0.25, 0.5, 0.25], [0.25, 0.25, 0.5]])
     sample_weight = np.array([0.25, 0.5, 0.25])
     X_repeated, Y_repeated, sample_weight_repeated = \
         CrossEntropyMNL._label_encoder(x, y, sample_weight)
     np.testing.assert_array_equal(
         X_repeated,
         np.array([
             [1, 2, 3], [1, 2, 3], [1, 2, 3],
             [4, 5, 6], [4, 5, 6], [4, 5, 6],
             [7, 8, 9], [7, 8, 9], [7, 8, 9]]))
     np.testing.assert_array_equal(
         Y_repeated,
         np.array([0, 1, 2, 0, 1, 2, 0, 1, 2]))
     np.testing.assert_array_equal(
         sample_weight_repeated,
         np.array([0.125, 0.0625, 0.0625, 0.125, 0.25, 0.125, 0.0625, 0.0625, 0.125]))
Example #10
0
    def test_lr_disturbed_multicolinearty(self):
        self.model_col = CrossEntropyMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, n_classes=None)
        X = np.hstack([self.data_spector.exog[:, 0:1], self.data_spector.exog[:, 0:1]])
        self.model_col.fit(X,
                           self.y_disturbed, sample_weight=0.5)
        self.model = CrossEntropyMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, n_classes=None)
        self.model.fit(self.data_spector.exog[:, 0:1],
                       self.y_disturbed, sample_weight=0.5)

        np.testing.assert_array_almost_equal(
            self.model_col.coef, np.array([[-9.359,  1.37,  1.37]]), decimal=3)
        # loglike_per_sample
        np.testing.assert_array_almost_equal(
            self.model_col.loglike_per_sample(X, self.y_disturbed),
            self.model.loglike_per_sample(self.data_spector.exog[:, 0:1],
                                          self.y_disturbed), decimal=3)
        np.testing.assert_array_almost_equal(
            self.model_col.predict(X),
            self.model.predict(self.data_spector.exog[:, 0:1]), decimal=3)
 def test_lr_sample_weight_all_zero(self):
     self.model = CrossEntropyMNL(solver='lbfgs',
                                  fit_intercept=True,
                                  est_stderr=True,
                                  reg_method='l2',
                                  alpha=0,
                                  l1_ratio=0,
                                  tol=1e-4,
                                  max_iter=100,
                                  coef=None,
                                  stderr=None,
                                  n_classes=None)
     self.assertRaises(ValueError, self.model.fit, self.data_spector.exog,
                       self.y, 0)
    def test_train_no_covariates(self):
        self.model = UnSupervisedIOHMM(num_states=2,
                                       max_EM_iter=100,
                                       EM_tol=1e-6)
        self.model.set_models(model_initial=CrossEntropyMNL(solver='lbfgs',
                                                            reg_method='l2'),
                              model_transition=CrossEntropyMNL(
                                  solver='lbfgs', reg_method='l2'),
                              model_emissions=[OLS()])
        self.model.set_inputs(covariates_initial=[],
                              covariates_transition=[],
                              covariates_emissions=[[]])
        self.model.set_outputs([['rt']])
        self.model.set_data([self.data_speed])
        self.model.train()

        # emission coefficients
        np.testing.assert_array_almost_equal(
            self.model.model_emissions[0][0].coef,
            np.array([[5.5]]),
            decimal=1)
        np.testing.assert_array_almost_equal(
            self.model.model_emissions[1][0].coef,
            np.array([[6.4]]),
            decimal=1)

        # emission dispersion
        np.testing.assert_array_almost_equal(
            self.model.model_emissions[0][0].dispersion,
            np.array([[0.037]]),
            decimal=2)
        np.testing.assert_array_almost_equal(
            self.model.model_emissions[1][0].dispersion,
            np.array([[0.063]]),
            decimal=2)

        # transition
        np.testing.assert_array_almost_equal(
            self.model.model_transition[1].coef, np.array([[2.4]]), decimal=1)
        np.testing.assert_array_almost_equal(
            self.model.model_transition[0].coef, np.array([[-2]]), decimal=1)
        np.testing.assert_array_almost_equal(np.exp(
            self.model.model_transition[1].predict_log_proba(np.array([[]]))),
                                             np.array([[0.08, 0.92]]),
                                             decimal=2)
        np.testing.assert_array_almost_equal(np.exp(
            self.model.model_transition[0].predict_log_proba(np.array([[]]))),
                                             np.array([[0.88, 0.12]]),
                                             decimal=2)
Example #13
0
 def test_lr_sample_weight_all_zero(self):
     self.model = DiscreteMNL(
         solver='lbfgs', fit_intercept=True, est_stderr=True,
         reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
         coef=None, stderr=None, classes=None)
     self.assertRaises(ValueError, self.model.fit,
                       self.data_anes96.exog, self.y_disturbed, 0)
Example #14
0
 def test_lr_sample_weight_all_zero(self):
     self.model = CrossEntropyMNL(
         solver='lbfgs', fit_intercept=True, est_stderr=True,
         reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
         coef=None, stderr=None, n_classes=None)
     self.assertRaises(ValueError, self.model.fit,
                       self.data_spector.exog, self.y, 0)
    def test_train_covariates_for_transition(self):
        self.model = UnSupervisedIOHMM(num_states=2,
                                       max_EM_iter=100,
                                       EM_tol=1e-6)
        self.model.set_models(model_initial=CrossEntropyMNL(solver='newton-cg',
                                                            reg_method='l2'),
                              model_transition=CrossEntropyMNL(
                                  solver='newton-cg', reg_method='l2'),
                              model_emissions=[OLS()])
        self.model.set_inputs(covariates_initial=[],
                              covariates_transition=['Pacc'],
                              covariates_emissions=[[]])
        self.model.set_outputs([['rt']])
        self.model.set_data([self.data_speed])
        self.model.train()
        # emission coefficients
        np.testing.assert_array_almost_equal(
            self.model.model_emissions[0][0].coef,
            np.array([[5.5]]),
            decimal=1)
        np.testing.assert_array_almost_equal(
            self.model.model_emissions[1][0].coef,
            np.array([[6.4]]),
            decimal=1)

        # emission dispersion
        np.testing.assert_array_almost_equal(
            self.model.model_emissions[0][0].dispersion,
            np.array([[0.036]]),
            decimal=2)
        np.testing.assert_array_almost_equal(
            self.model.model_emissions[1][0].dispersion,
            np.array([[0.063]]),
            decimal=2)

        # transition
        np.testing.assert_array_almost_equal(np.exp(
            self.model.model_transition[0].predict_log_proba(
                self.model.inp_transitions_all_sequences)).sum(axis=0),
                                             np.array([312, 126]),
                                             decimal=0)
        np.testing.assert_array_almost_equal(np.exp(
            self.model.model_transition[1].predict_log_proba(
                self.model.inp_transitions_all_sequences)).sum(axis=0),
                                             np.array([112, 326]),
                                             decimal=0)
    def test_lr(self):
        self.model = CrossEntropyMNL(solver='lbfgs',
                                     fit_intercept=True,
                                     est_stderr=True,
                                     reg_method='l2',
                                     alpha=0,
                                     l1_ratio=0,
                                     tol=1e-4,
                                     max_iter=100,
                                     coef=None,
                                     stderr=None,
                                     n_classes=None)
        self.model.fit(self.data_spector.exog, self.y)
        # coefficient
        np.testing.assert_array_almost_equal(
            self.model.coef,
            np.array([[-13.021, 2.8261, .09515, 2.378]]),
            decimal=3)

        # predict
        np.testing.assert_array_almost_equal(
            self.model.predict(self.data_spector.exog),
            np.array((0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
                      0., 0., 0., 0., 1., 1., 0., 1., 0., 1., 1., 0., 1., 0.,
                      1., 1., 1., 0.)),
            decimal=3)
        # loglike/_per_sample
        self.assertAlmostEqual(self.model.loglike(self.data_spector.exog,
                                                  self.y),
                               -12.8896334653335,
                               places=3)
        # to_json
        json_dict = self.model.to_json(
            './tests/linear_models/CrossentropyMNL/Binary/')
        self.assertEqual(json_dict['properties']['solver'], 'lbfgs')

        # from_json
        self.model_from_json = CrossEntropyMNL.from_json(json_dict)
        np.testing.assert_array_almost_equal(self.model.coef,
                                             self.model_from_json.coef,
                                             decimal=3)
        np.testing.assert_array_almost_equal(self.model.classes,
                                             np.array([0, 1]),
                                             decimal=3)
        self.assertEqual(self.model.n_classes, 2)
    def test_lr_multicolinearty(self):
        self.model_col = CrossEntropyMNL(solver='lbfgs',
                                         fit_intercept=True,
                                         est_stderr=True,
                                         reg_method='l2',
                                         alpha=0,
                                         l1_ratio=0,
                                         tol=1e-4,
                                         max_iter=100,
                                         coef=None,
                                         stderr=None,
                                         n_classes=None)
        X = np.hstack(
            [self.data_spector.exog[:, 0:1], self.data_spector.exog[:, 0:1]])
        self.model_col.fit(X, self.y, sample_weight=0.5)
        self.model = CrossEntropyMNL(solver='lbfgs',
                                     fit_intercept=True,
                                     est_stderr=True,
                                     reg_method='l2',
                                     alpha=0,
                                     l1_ratio=0,
                                     tol=1e-4,
                                     max_iter=100,
                                     coef=None,
                                     stderr=None,
                                     n_classes=None)
        self.model.fit(self.data_spector.exog[:, 0:1],
                       self.y,
                       sample_weight=0.5)

        np.testing.assert_array_almost_equal(
            self.model_col.coef,
            np.array([[-9.703, 1.42002783, 1.42002783]]),
            decimal=3)
        # loglike_per_sample
        np.testing.assert_array_almost_equal(
            self.model_col.loglike_per_sample(X, self.y),
            self.model.loglike_per_sample(self.data_spector.exog[:, 0:1],
                                          self.y),
            decimal=3)
        np.testing.assert_array_almost_equal(
            self.model_col.predict(X),
            self.model.predict(self.data_spector.exog[:, 0:1]),
            decimal=3)
Example #18
0
 def test_lr_disturbed_sample_weight_half_zero_half_one(self):
     self.model = CrossEntropyMNL(
         solver='lbfgs', fit_intercept=True, est_stderr=True,
         reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
         coef=None, stderr=None, n_classes=None)
     len_half = 500
     self.model.fit(self.data_anes96.exog, self.y_disturbed,
                    sample_weight=np.array([1] * len_half +
                                           [0] * (self.data_anes96.exog.shape[0] - len_half)))
     self.model_half = CrossEntropyMNL(
         solver='lbfgs', fit_intercept=True, est_stderr=True,
         reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
         coef=None, stderr=None, n_classes=None)
     self.model_half.fit(self.data_anes96.exog[:len_half], self.y_disturbed[:len_half])
     # coefficient
     np.testing.assert_array_almost_equal(
         self.model.coef,
         self.model_half.coef,
         decimal=3)
 def test_lr_disturbed_sample_weight_all_half(self):
     self.model_half = CrossEntropyMNL(solver='newton-cg',
                                       fit_intercept=True,
                                       est_stderr=True,
                                       reg_method='l2',
                                       alpha=0,
                                       l1_ratio=0,
                                       tol=1e-4,
                                       max_iter=100,
                                       coef=None,
                                       stderr=None,
                                       n_classes=None)
     self.model_half.fit(self.data_anes96.exog,
                         self.y_disturbed,
                         sample_weight=.5)
     self.model = CrossEntropyMNL(solver='newton-cg',
                                  fit_intercept=True,
                                  est_stderr=True,
                                  reg_method='l2',
                                  alpha=0,
                                  l1_ratio=0,
                                  tol=1e-4,
                                  max_iter=100,
                                  coef=None,
                                  stderr=None,
                                  n_classes=None)
     self.model.fit(self.data_anes96.exog, self.y_disturbed)
     # coefficient
     np.testing.assert_array_almost_equal(self.model.coef,
                                          self.model_half.coef,
                                          decimal=3)
     # predict
     self.assertEqual(
         np.sum(
             self.model_half.predict(self.data_anes96.exog) ==
             self.data_anes96.endog), 367)
     # loglike/_per_sample
     self.assertAlmostEqual(self.model.loglike(self.data_anes96.exog,
                                               self.y_disturbed,
                                               sample_weight=.5),
                            old_div(-1516.50148, 2.),
                            places=3)
 def test_lr_sample_weight_all_half(self):
     self.model = CrossEntropyMNL(solver='lbfgs',
                                  fit_intercept=True,
                                  est_stderr=True,
                                  reg_method='l2',
                                  alpha=0,
                                  l1_ratio=0,
                                  tol=1e-4,
                                  max_iter=100,
                                  coef=None,
                                  stderr=None,
                                  n_classes=None)
     self.model.fit(self.data_spector.exog, self.y, sample_weight=.5)
     # coefficient
     np.testing.assert_array_equal(self.model.coef, np.zeros((4, 1)))
     # loglike/_per_sample
     self.assertEqual(
         self.model.loglike(self.data_spector.exog,
                            self.y,
                            sample_weight=.5), 0)
Example #21
0
 def test_lr_disturbed_sample_weight_all_half(self):
     self.model_half = CrossEntropyMNL(
         solver='newton-cg', fit_intercept=True, est_stderr=True,
         reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
         coef=None, stderr=None, n_classes=None)
     self.model_half.fit(self.data_anes96.exog, self.y_disturbed, sample_weight=.5)
     self.model = CrossEntropyMNL(
         solver='newton-cg', fit_intercept=True, est_stderr=True,
         reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
         coef=None, stderr=None, n_classes=None)
     self.model.fit(self.data_anes96.exog, self.y_disturbed)
     # coefficient
     np.testing.assert_array_almost_equal(self.model.coef, self.model_half.coef, decimal=3)
     # predict
     self.assertEqual(
         np.sum(self.model_half.predict(self.data_anes96.exog) ==
                self.data_anes96.endog), 367)
     # loglike/_per_sample
     self.assertAlmostEqual(
         self.model.loglike(self.data_anes96.exog, self.y_disturbed, sample_weight=.5),
         old_div(-1516.50148, 2.),
         places=3)
Example #22
0
 def test_lr_sample_weight_all_half(self):
     self.model = CrossEntropyMNL(
         solver='lbfgs', fit_intercept=True, est_stderr=True,
         reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
         coef=None, stderr=None, n_classes=None)
     self.model.fit(self.data_spector.exog, self.y, sample_weight=.5)
     # coefficient
     np.testing.assert_array_equal(
         self.model.coef,
         np.zeros((4, 1)))
     # loglike/_per_sample
     self.assertEqual(
         self.model.loglike(self.data_spector.exog, self.y, sample_weight=.5), 0)
    def test_lr(self):
        self.model = CrossEntropyMNL(solver='newton-cg',
                                     fit_intercept=True,
                                     est_stderr=True,
                                     reg_method='l2',
                                     alpha=10,
                                     l1_ratio=0,
                                     tol=1e-4,
                                     max_iter=100,
                                     coef=None,
                                     stderr=None,
                                     n_classes=None)
        self.model.fit(self.data_anes96.exog, self.y)
        # coefficient
        # predict
        self.assertEqual(
            np.sum(
                self.model.predict(self.data_anes96.exog) ==
                self.data_anes96.endog), 333)
        # loglike/_per_sample
        self.assertAlmostEqual(self.model.loglike(self.data_anes96.exog,
                                                  self.y),
                               -1540.888458338286,
                               places=3)
        # to_json
        json_dict = self.model.to_json(
            './tests/linear_models/CrossentropyMNL/Multinomial/')
        self.assertEqual(json_dict['properties']['solver'], 'newton-cg')

        # from_json
        self.model_from_json = CrossEntropyMNL.from_json(json_dict)
        np.testing.assert_array_almost_equal(self.model.coef,
                                             self.model_from_json.coef,
                                             decimal=3)
        np.testing.assert_array_almost_equal(self.model.classes,
                                             np.array(list(range(7))),
                                             decimal=3)
        self.assertEqual(self.model.n_classes, 7)
 def test_lr_disturbed_regularized(self):
     self.model = CrossEntropyMNL(solver='newton-cg',
                                  fit_intercept=True,
                                  est_stderr=True,
                                  reg_method='l2',
                                  alpha=.5,
                                  l1_ratio=0,
                                  tol=1e-4,
                                  max_iter=100,
                                  coef=None,
                                  stderr=None,
                                  n_classes=None)
     self.model.fit(self.data_anes96.exog, self.y_disturbed)
     # predict
     self.assertEqual(
         np.sum(
             self.model.predict(self.data_anes96.exog) ==
             self.data_anes96.endog), 366)
     # loglike/_per_sample
     self.assertAlmostEqual(self.model.loglike(self.data_anes96.exog,
                                               self.y_disturbed),
                            -1519.9521131193064,
                            places=3)
 def test_lr_sample_weight_all_zero(self):
     self.model = DiscreteMNL(solver='lbfgs',
                              fit_intercept=True,
                              est_stderr=True,
                              reg_method='l2',
                              alpha=0,
                              l1_ratio=0,
                              tol=1e-4,
                              max_iter=100,
                              coef=None,
                              stderr=None,
                              classes=None)
     self.assertRaises(ValueError, self.model.fit, self.data_anes96.exog,
                       self.y_disturbed, 0)
Example #26
0
    def test_lr(self):
        self.model = CrossEntropyMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, n_classes=None)
        self.model.fit(self.data_spector.exog, self.y)
        # coefficient
        np.testing.assert_array_almost_equal(
            self.model.coef,
            np.array([[-13.021, 2.8261, .09515, 2.378]]),
            decimal=3)

        # predict
        np.testing.assert_array_almost_equal(
            self.model.predict(self.data_spector.exog),
            np.array((0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,
                      0.,  0.,  0.,  0.,  0.,  1.,  1.,  0.,  1.,  0.,  1.,  1.,  0.,
                      1.,  0.,  1.,  1.,  1.,  0.)),
            decimal=3)
        # loglike/_per_sample
        self.assertAlmostEqual(
            self.model.loglike(self.data_spector.exog, self.y),
            -12.8896334653335,
            places=3)
        # to_json
        json_dict = self.model.to_json('./tests/linear_models/CrossentropyMNL/Binary/')
        self.assertEqual(json_dict['properties']['solver'], 'lbfgs')

        # from_json
        self.model_from_json = CrossEntropyMNL.from_json(json_dict)
        np.testing.assert_array_almost_equal(
            self.model.coef,
            self.model_from_json.coef,
            decimal=3)
        np.testing.assert_array_almost_equal(
            self.model.classes, np.array([0, 1]), decimal=3)
        self.assertEqual(self.model.n_classes, 2)
 def test_lr_sample_weight_all_half(self):
     self.model = CrossEntropyMNL(solver='lbfgs',
                                  fit_intercept=True,
                                  est_stderr=True,
                                  reg_method='l2',
                                  alpha=0,
                                  l1_ratio=0,
                                  tol=1e-4,
                                  max_iter=100,
                                  coef=None,
                                  stderr=None,
                                  n_classes=None)
     self.model.fit(self.data_spector.exog, self.y, sample_weight=.5)
     # coefficient
     np.testing.assert_array_almost_equal(
         self.model.coef,
         np.array([[-13.021, 2.8261, .09515, 2.378]]),
         decimal=3)
     # loglike/_per_sample
     self.assertAlmostEqual(self.model.loglike(self.data_spector.exog,
                                               self.y,
                                               sample_weight=.5),
                            old_div(-12.8896334653335, 2.),
                            places=3)
Example #28
0
 def test_lr_disturbed_regularized(self):
     self.model = CrossEntropyMNL(
         solver='newton-cg', fit_intercept=True, est_stderr=True,
         reg_method='l2',  alpha=.5, l1_ratio=0,  tol=1e-4, max_iter=100,
         coef=None, stderr=None, n_classes=None)
     self.model.fit(self.data_anes96.exog, self.y_disturbed)
     # predict
     self.assertEqual(
         np.sum(self.model.predict(self.data_anes96.exog) ==
                self.data_anes96.endog), 366)
     # loglike/_per_sample
     self.assertAlmostEqual(
         self.model.loglike(self.data_anes96.exog, self.y_disturbed),
         -1519.9521131193064,
         places=3)
 def test_lr_disturbed(self):
     self.model = CrossEntropyMNL(solver='newton-cg',
                                  fit_intercept=True,
                                  est_stderr=True,
                                  reg_method='l2',
                                  alpha=10,
                                  l1_ratio=0,
                                  tol=1e-4,
                                  max_iter=100,
                                  coef=None,
                                  stderr=None,
                                  n_classes=None)
     self.model.fit(self.data_anes96.exog, self.y_disturbed)
     # coefficient
     # predict
     self.assertEqual(
         np.sum(
             self.model.predict(self.data_anes96.exog) ==
             self.data_anes96.endog), 335)
     # loglike/_per_sample
     self.assertAlmostEqual(self.model.loglike(self.data_anes96.exog,
                                               self.y_disturbed),
                            -1580.5280532302786,
                            places=3)
 def test_lr_disturbed_three_data_point(self):
     # with regularization
     self.model = CrossEntropyMNL(solver='lbfgs',
                                  fit_intercept=True,
                                  est_stderr=True,
                                  reg_method='l2',
                                  alpha=.1,
                                  l1_ratio=0,
                                  tol=1e-4,
                                  max_iter=100,
                                  coef=None,
                                  stderr=None,
                                  n_classes=None)
     self.model.fit(self.data_anes96.exog[6:9, :],
                    self.y_disturbed[6:9, ],
                    sample_weight=0.5)
     # coef
     self.assertEqual(self.model.coef.shape, (7, 6))
     # loglike_per_sample
     np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
         self.data_anes96.exog[6:9, :], self.y_disturbed[6:9, ]),
                                          np.array([-0.336, -0.389,
                                                    -0.398]),
                                          decimal=3)
     np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
         self.data_anes96.exog[6:9, :],
         label_binarize([3, 1, 4], list(range(7)))),
                                          np.array([-3.415, -4.506,
                                                    -2.367]),
                                          decimal=3)
     np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
         self.data_anes96.exog[6:9, :],
         label_binarize([3, 0, 5], list(range(7)))),
                                          np.array([-3.415, -4.492,
                                                    -4.301]),
                                          decimal=3)
Example #31
0
 def test_lr_sample_weight_all_half(self):
     self.model = CrossEntropyMNL(
         solver='lbfgs', fit_intercept=True, est_stderr=True,
         reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
         coef=None, stderr=None, n_classes=None)
     self.model.fit(self.data_spector.exog, self.y, sample_weight=.5)
     # coefficient
     np.testing.assert_array_almost_equal(
         self.model.coef,
         np.array([[-13.021, 2.8261, .09515, 2.378]]),
         decimal=3)
     # loglike/_per_sample
     self.assertAlmostEqual(
         self.model.loglike(self.data_spector.exog, self.y, sample_weight=.5),
         old_div(-12.8896334653335, 2.),
         places=3)
Example #32
0
 def test_lr_disturbed(self):
     self.model = CrossEntropyMNL(
         solver='newton-cg', fit_intercept=True, est_stderr=True,
         reg_method='l2',  alpha=10, l1_ratio=0,  tol=1e-4, max_iter=100,
         coef=None, stderr=None, n_classes=None)
     self.model.fit(self.data_anes96.exog, self.y_disturbed)
     # coefficient
     # predict
     self.assertEqual(
         np.sum(self.model.predict(self.data_anes96.exog) ==
                self.data_anes96.endog), 335)
     # loglike/_per_sample
     self.assertAlmostEqual(
         self.model.loglike(self.data_anes96.exog, self.y_disturbed),
         -1580.5280532302786,
         places=3)
Example #33
0
 def test_lr_one_data_point(self):
     # with regularization
     self.model = CrossEntropyMNL(
         solver='lbfgs', fit_intercept=True, est_stderr=True,
         reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
         coef=None, stderr=None, n_classes=None)
     self.model.fit(self.data_spector.exog[4:5, :],
                    self.y[4:5, ], sample_weight=0.5)
     # coef
     np.testing.assert_array_equal(
         self.model.coef,
         np.zeros((4, 1)))
     # loglike_per_sample
     np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
         self.data_spector.exog[4:6, :], np.array([1, 0]).reshape(-1, 1)),
         np.array([0, -np.Infinity]), decimal=3)
     np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
         self.data_spector.exog[4:6, :], np.array([1, 1]).reshape(-1, 1)),
         np.array([0, 0]), decimal=3)
Example #34
0
    def test_lr(self):
        self.model = CrossEntropyMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, n_classes=None)
        self.model.fit(self.data_spector.exog, self.y)
        # coefficient
        np.testing.assert_array_equal(
            self.model.coef,
            np.zeros((4, 1)))

        # predict
        np.testing.assert_array_equal(
            self.model.predict(self.data_spector.exog),
            np.array([0] * self.data_spector.endog.shape[0]))
        # loglike/_per_sample
        np.testing.assert_array_equal(
            self.model.loglike_per_sample(self.data_spector.exog,
                                          np.array([1] * 16 + [0] * 16).reshape(-1, 1)),
            np.array([0] * 16 + [-np.Infinity] * 16))
Example #35
0
 def test_lr_disturbed_three_data_point(self):
     # with regularization
     self.model = CrossEntropyMNL(
         solver='lbfgs', fit_intercept=True, est_stderr=True,
         reg_method='l2',  alpha=.1, l1_ratio=0,  tol=1e-4, max_iter=100,
         coef=None, stderr=None, n_classes=None)
     self.model.fit(self.data_anes96.exog[6:9, :],
                    self.y_disturbed[6:9, ], sample_weight=0.5)
     # coef
     self.assertEqual(self.model.coef.shape, (7, 6))
     # loglike_per_sample
     np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
         self.data_anes96.exog[6:9, :], self.y_disturbed[6:9, ]),
         np.array([-0.336, -0.389, -0.398]), decimal=3)
     np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
         self.data_anes96.exog[6:9, :], label_binarize([3, 1, 4], list(range(7)))),
         np.array([-3.415, -4.506, -2.367]), decimal=3)
     np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
         self.data_anes96.exog[6:9, :], label_binarize([3, 0, 5], list(range(7)))),
         np.array([-3.415, -4.492, -4.301]), decimal=3)
Example #36
0
 def test_lr_disturbed_two_data_point(self):
     # with regularization
     self.model = CrossEntropyMNL(
         solver='lbfgs', fit_intercept=True, est_stderr=True,
         reg_method='l2',  alpha=.1, l1_ratio=0,  tol=1e-4, max_iter=100,
         coef=None, stderr=None, n_classes=None)
     self.model.fit(self.data_spector.exog[4:6, :],
                    self.y_disturbed[4:6, ], sample_weight=0.5)
     # coef
     self.assertEqual(self.model.coef.shape, (1, 4))
     # loglike_per_sample
     np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
         self.data_spector.exog[4:6, :], self.y_disturbed[4:6, ]),
         np.array([-0.503, -0.662]), decimal=3)
     # loglike_per_sample
     np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
         self.data_spector.exog[4:6, :],
         np.array([[0, 0], [0.99, 0.01]])),
         np.array([-np.Infinity, -0.662]), decimal=3)
     np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
         self.data_spector.exog[4:6, :],
         np.array([[0, 0], [0.01, 0.99]])),
         np.array([-np.Infinity, -0.725]), decimal=3)
Example #37
0
    def test_lr_regularized(self):
        self.model = CrossEntropyMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=.01, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, n_classes=None)
        self.model.fit(self.data_spector.exog, self.y)
        # coefficient
        np.testing.assert_array_almost_equal(
            self.model.coef,
            np.array([[-10.66,   2.364,   0.064,   2.142]]),
            decimal=3)

        # predict
        np.testing.assert_array_almost_equal(
            self.model.predict(self.data_spector.exog),
            np.array((0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,
                      0.,  0.,  0.,  0.,  0.,  1.,  1.,  0.,  1.,  0.,  1.,  1.,  0.,
                      1.,  0.,  1.,  1.,  1.,  0.)),
            decimal=3)
        # loglike/_per_sample
        self.assertAlmostEqual(
            self.model.loglike(self.data_spector.exog, self.y),
            -13.016861222748515,
            places=3)
Example #38
0
    def test_lr_disturbed_sample_weight_all_half(self):
        self.model = CrossEntropyMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, n_classes=None)
        self.model.fit(self.data_spector.exog, self.y_disturbed, sample_weight=.5)
        # coefficient
        np.testing.assert_array_almost_equal(
            self.model.coef,
            np.array([[-12.327,  2.686,  0.089,  2.258]]),
            decimal=3)

        # predict
        np.testing.assert_array_almost_equal(
            self.model.predict(self.data_spector.exog),
            np.array((0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,
                      0.,  0.,  0.,  0.,  0.,  1.,  1.,  0.,  1.,  0.,  1.,  1.,  0.,
                      1.,  0.,  1.,  1.,  1.,  0.)),
            decimal=3)
        # loglike/_per_sample
        self.assertAlmostEqual(
            self.model.loglike(self.data_spector.exog, self.y_disturbed, sample_weight=.5),
            old_div(-13.366314173353134, 2.),
            places=3)
class CrossEntropyMNLBinaryTests(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        cls.data_spector = sm.datasets.spector.load()
        cls.y = np.array([[1, 0], [1, 0], [1, 0], [1, 0], [0, 1], [1,
                                                                   0], [1, 0],
                          [1, 0], [1, 0], [0, 1], [1, 0], [1, 0], [1,
                                                                   0], [0, 1],
                          [1, 0], [1, 0], [1, 0], [1, 0], [1, 0], [0,
                                                                   1], [1, 0],
                          [0, 1], [1, 0], [1, 0], [0, 1], [0, 1], [0, 1],
                          [1, 0], [0, 1], [0, 1], [1, 0], [0, 1]])
        cls.y_disturbed = np.array([[0.99, 0.01], [0.99, 0.01], [0.99, 0.01],
                                    [0.99, 0.01], [0.01, 0.99], [0.99, 0.01],
                                    [0.99, 0.01], [0.99, 0.01], [0.99, 0.01],
                                    [0.01, 0.99], [0.99, 0.01], [0.99, 0.01],
                                    [0.99, 0.01], [0.01, 0.99], [0.99, 0.01],
                                    [0.99, 0.01], [0.99, 0.01], [0.99, 0.01],
                                    [0.99, 0.01], [0.01, 0.99], [0.99, 0.01],
                                    [0.01, 0.99], [0.99, 0.01], [0.99, 0.01],
                                    [0.01, 0.99], [0.01, 0.99], [0.01, 0.99],
                                    [0.99, 0.01], [0.01, 0.99], [0.01, 0.99],
                                    [0.99, 0.01], [0.01, 0.99]])

    def test_lr(self):
        self.model = CrossEntropyMNL(solver='lbfgs',
                                     fit_intercept=True,
                                     est_stderr=True,
                                     reg_method='l2',
                                     alpha=0,
                                     l1_ratio=0,
                                     tol=1e-4,
                                     max_iter=100,
                                     coef=None,
                                     stderr=None,
                                     n_classes=None)
        self.model.fit(self.data_spector.exog, self.y)
        # coefficient
        np.testing.assert_array_almost_equal(
            self.model.coef,
            np.array([[-13.021, 2.8261, .09515, 2.378]]),
            decimal=3)

        # predict
        np.testing.assert_array_almost_equal(
            self.model.predict(self.data_spector.exog),
            np.array((0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
                      0., 0., 0., 0., 1., 1., 0., 1., 0., 1., 1., 0., 1., 0.,
                      1., 1., 1., 0.)),
            decimal=3)
        # loglike/_per_sample
        self.assertAlmostEqual(self.model.loglike(self.data_spector.exog,
                                                  self.y),
                               -12.8896334653335,
                               places=3)
        # to_json
        json_dict = self.model.to_json(
            './tests/linear_models/CrossentropyMNL/Binary/')
        self.assertEqual(json_dict['properties']['solver'], 'lbfgs')

        # from_json
        self.model_from_json = CrossEntropyMNL.from_json(json_dict)
        np.testing.assert_array_almost_equal(self.model.coef,
                                             self.model_from_json.coef,
                                             decimal=3)
        np.testing.assert_array_almost_equal(self.model.classes,
                                             np.array([0, 1]),
                                             decimal=3)
        self.assertEqual(self.model.n_classes, 2)

    def test_lr_disturbed(self):
        self.model = CrossEntropyMNL(solver='lbfgs',
                                     fit_intercept=True,
                                     est_stderr=True,
                                     reg_method='l2',
                                     alpha=0,
                                     l1_ratio=0,
                                     tol=1e-4,
                                     max_iter=100,
                                     coef=None,
                                     stderr=None,
                                     n_classes=None)
        self.model.fit(self.data_spector.exog, self.y_disturbed)
        # coefficient
        np.testing.assert_array_almost_equal(
            self.model.coef,
            np.array([[-12.327, 2.686, 0.089, 2.258]]),
            decimal=3)

        # predict
        np.testing.assert_array_almost_equal(
            self.model.predict(self.data_spector.exog),
            np.array((0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
                      0., 0., 0., 0., 1., 1., 0., 1., 0., 1., 1., 0., 1., 0.,
                      1., 1., 1., 0.)),
            decimal=3)
        # loglike/_per_sample
        self.assertAlmostEqual(self.model.loglike(self.data_spector.exog,
                                                  self.y_disturbed),
                               -13.366314173353134,
                               places=3)

    def test_lr_regularized(self):
        self.model = CrossEntropyMNL(solver='lbfgs',
                                     fit_intercept=True,
                                     est_stderr=True,
                                     reg_method='l2',
                                     alpha=.01,
                                     l1_ratio=0,
                                     tol=1e-4,
                                     max_iter=100,
                                     coef=None,
                                     stderr=None,
                                     n_classes=None)
        self.model.fit(self.data_spector.exog, self.y)
        # coefficient
        np.testing.assert_array_almost_equal(
            self.model.coef,
            np.array([[-10.66, 2.364, 0.064, 2.142]]),
            decimal=3)

        # predict
        np.testing.assert_array_almost_equal(
            self.model.predict(self.data_spector.exog),
            np.array((0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
                      0., 0., 0., 0., 1., 1., 0., 1., 0., 1., 1., 0., 1., 0.,
                      1., 1., 1., 0.)),
            decimal=3)
        # loglike/_per_sample
        self.assertAlmostEqual(self.model.loglike(self.data_spector.exog,
                                                  self.y),
                               -13.016861222748515,
                               places=3)

    def test_lr_sample_weight_all_half(self):
        self.model = CrossEntropyMNL(solver='lbfgs',
                                     fit_intercept=True,
                                     est_stderr=True,
                                     reg_method='l2',
                                     alpha=0,
                                     l1_ratio=0,
                                     tol=1e-4,
                                     max_iter=100,
                                     coef=None,
                                     stderr=None,
                                     n_classes=None)
        self.model.fit(self.data_spector.exog, self.y, sample_weight=.5)
        # coefficient
        np.testing.assert_array_almost_equal(
            self.model.coef,
            np.array([[-13.021, 2.8261, .09515, 2.378]]),
            decimal=3)
        # loglike/_per_sample
        self.assertAlmostEqual(self.model.loglike(self.data_spector.exog,
                                                  self.y,
                                                  sample_weight=.5),
                               old_div(-12.8896334653335, 2.),
                               places=3)

    def test_lr_disturbed_sample_weight_all_half(self):
        self.model = CrossEntropyMNL(solver='lbfgs',
                                     fit_intercept=True,
                                     est_stderr=True,
                                     reg_method='l2',
                                     alpha=0,
                                     l1_ratio=0,
                                     tol=1e-4,
                                     max_iter=100,
                                     coef=None,
                                     stderr=None,
                                     n_classes=None)
        self.model.fit(self.data_spector.exog,
                       self.y_disturbed,
                       sample_weight=.5)
        # coefficient
        np.testing.assert_array_almost_equal(
            self.model.coef,
            np.array([[-12.327, 2.686, 0.089, 2.258]]),
            decimal=3)

        # predict
        np.testing.assert_array_almost_equal(
            self.model.predict(self.data_spector.exog),
            np.array((0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
                      0., 0., 0., 0., 1., 1., 0., 1., 0., 1., 1., 0., 1., 0.,
                      1., 1., 1., 0.)),
            decimal=3)
        # loglike/_per_sample
        self.assertAlmostEqual(self.model.loglike(self.data_spector.exog,
                                                  self.y_disturbed,
                                                  sample_weight=.5),
                               old_div(-13.366314173353134, 2.),
                               places=3)

    def test_lr_sample_weight_all_zero(self):
        self.model = CrossEntropyMNL(solver='lbfgs',
                                     fit_intercept=True,
                                     est_stderr=True,
                                     reg_method='l2',
                                     alpha=0,
                                     l1_ratio=0,
                                     tol=1e-4,
                                     max_iter=100,
                                     coef=None,
                                     stderr=None,
                                     n_classes=None)
        self.assertRaises(ValueError, self.model.fit, self.data_spector.exog,
                          self.y, 0)

    def test_lr_sample_weight_half_zero_half_one(self):
        self.model = CrossEntropyMNL(solver='lbfgs',
                                     fit_intercept=True,
                                     est_stderr=True,
                                     reg_method='l2',
                                     alpha=0,
                                     l1_ratio=0,
                                     tol=1e-4,
                                     max_iter=100,
                                     coef=None,
                                     stderr=None,
                                     n_classes=None)
        len_half = 8
        self.model.fit(self.data_spector.exog,
                       self.y,
                       sample_weight=np.array([1] * len_half + [0] *
                                              (self.y.shape[0] - len_half)))
        self.model_half = CrossEntropyMNL(solver='lbfgs',
                                          fit_intercept=True,
                                          est_stderr=True,
                                          reg_method='l2',
                                          alpha=0,
                                          l1_ratio=0,
                                          tol=1e-4,
                                          max_iter=100,
                                          coef=None,
                                          stderr=None,
                                          n_classes=None)
        self.model_half.fit(self.data_spector.exog[:len_half],
                            self.y[:len_half])
        # coefficient
        np.testing.assert_array_almost_equal(self.model.coef,
                                             self.model_half.coef,
                                             decimal=3)

    def test_lr_disturbed_sample_weight_half_zero_half_one(self):
        self.model = CrossEntropyMNL(solver='lbfgs',
                                     fit_intercept=True,
                                     est_stderr=True,
                                     reg_method='l2',
                                     alpha=0,
                                     l1_ratio=0,
                                     tol=1e-4,
                                     max_iter=100,
                                     coef=None,
                                     stderr=None,
                                     n_classes=None)
        len_half = 8
        self.model.fit(
            self.data_spector.exog,
            self.y_disturbed,
            sample_weight=np.array([1] * len_half + [0] *
                                   (self.y_disturbed.shape[0] - len_half)))
        self.model_half = CrossEntropyMNL(solver='lbfgs',
                                          fit_intercept=True,
                                          est_stderr=True,
                                          reg_method='l2',
                                          alpha=0,
                                          l1_ratio=0,
                                          tol=1e-4,
                                          max_iter=100,
                                          coef=None,
                                          stderr=None,
                                          n_classes=None)
        self.model_half.fit(self.data_spector.exog[:len_half],
                            self.y_disturbed[:len_half])
        # coefficient
        np.testing.assert_array_almost_equal(self.model.coef,
                                             self.model_half.coef,
                                             decimal=3)

    # corner cases
    def test_lr_two_data_point(self):
        # with regularization
        self.model = CrossEntropyMNL(solver='lbfgs',
                                     fit_intercept=True,
                                     est_stderr=True,
                                     reg_method='l2',
                                     alpha=.1,
                                     l1_ratio=0,
                                     tol=1e-4,
                                     max_iter=100,
                                     coef=None,
                                     stderr=None,
                                     n_classes=None)
        self.model.fit(self.data_spector.exog[4:6, :],
                       self.y[4:6, ],
                       sample_weight=0.5)
        # coef
        self.assertEqual(self.model.coef.shape, (1, 4))
        # loglike_per_sample
        np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
            self.data_spector.exog[4:6, :], self.y[4:6, ]),
                                             np.array([-0.495, -0.661]),
                                             decimal=3)
        # loglike_per_sample
        np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
            self.data_spector.exog[4:6, :], np.array([[0, 0], [1, 0]])),
                                             np.array([-np.Infinity, -0.661]),
                                             decimal=3)
        np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
            self.data_spector.exog[4:6, :], np.array([[0, 0], [0, 1]])),
                                             np.array([-np.Infinity, -0.726]),
                                             decimal=3)

    def test_lr_disturbed_two_data_point(self):
        # with regularization
        self.model = CrossEntropyMNL(solver='lbfgs',
                                     fit_intercept=True,
                                     est_stderr=True,
                                     reg_method='l2',
                                     alpha=.1,
                                     l1_ratio=0,
                                     tol=1e-4,
                                     max_iter=100,
                                     coef=None,
                                     stderr=None,
                                     n_classes=None)
        self.model.fit(self.data_spector.exog[4:6, :],
                       self.y_disturbed[4:6, ],
                       sample_weight=0.5)
        # coef
        self.assertEqual(self.model.coef.shape, (1, 4))
        # loglike_per_sample
        np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
            self.data_spector.exog[4:6, :], self.y_disturbed[4:6, ]),
                                             np.array([-0.503, -0.662]),
                                             decimal=3)
        # loglike_per_sample
        np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
            self.data_spector.exog[4:6, :], np.array([[0, 0], [0.99, 0.01]])),
                                             np.array([-np.Infinity, -0.662]),
                                             decimal=3)
        np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
            self.data_spector.exog[4:6, :], np.array([[0, 0], [0.01, 0.99]])),
                                             np.array([-np.Infinity, -0.725]),
                                             decimal=3)

    def test_lr_multicolinearty(self):
        self.model_col = CrossEntropyMNL(solver='lbfgs',
                                         fit_intercept=True,
                                         est_stderr=True,
                                         reg_method='l2',
                                         alpha=0,
                                         l1_ratio=0,
                                         tol=1e-4,
                                         max_iter=100,
                                         coef=None,
                                         stderr=None,
                                         n_classes=None)
        X = np.hstack(
            [self.data_spector.exog[:, 0:1], self.data_spector.exog[:, 0:1]])
        self.model_col.fit(X, self.y, sample_weight=0.5)
        self.model = CrossEntropyMNL(solver='lbfgs',
                                     fit_intercept=True,
                                     est_stderr=True,
                                     reg_method='l2',
                                     alpha=0,
                                     l1_ratio=0,
                                     tol=1e-4,
                                     max_iter=100,
                                     coef=None,
                                     stderr=None,
                                     n_classes=None)
        self.model.fit(self.data_spector.exog[:, 0:1],
                       self.y,
                       sample_weight=0.5)

        np.testing.assert_array_almost_equal(
            self.model_col.coef,
            np.array([[-9.703, 1.42002783, 1.42002783]]),
            decimal=3)
        # loglike_per_sample
        np.testing.assert_array_almost_equal(
            self.model_col.loglike_per_sample(X, self.y),
            self.model.loglike_per_sample(self.data_spector.exog[:, 0:1],
                                          self.y),
            decimal=3)
        np.testing.assert_array_almost_equal(
            self.model_col.predict(X),
            self.model.predict(self.data_spector.exog[:, 0:1]),
            decimal=3)

    def test_lr_disturbed_multicolinearty(self):
        self.model_col = CrossEntropyMNL(solver='lbfgs',
                                         fit_intercept=True,
                                         est_stderr=True,
                                         reg_method='l2',
                                         alpha=0,
                                         l1_ratio=0,
                                         tol=1e-4,
                                         max_iter=100,
                                         coef=None,
                                         stderr=None,
                                         n_classes=None)
        X = np.hstack(
            [self.data_spector.exog[:, 0:1], self.data_spector.exog[:, 0:1]])
        self.model_col.fit(X, self.y_disturbed, sample_weight=0.5)
        self.model = CrossEntropyMNL(solver='lbfgs',
                                     fit_intercept=True,
                                     est_stderr=True,
                                     reg_method='l2',
                                     alpha=0,
                                     l1_ratio=0,
                                     tol=1e-4,
                                     max_iter=100,
                                     coef=None,
                                     stderr=None,
                                     n_classes=None)
        self.model.fit(self.data_spector.exog[:, 0:1],
                       self.y_disturbed,
                       sample_weight=0.5)

        np.testing.assert_array_almost_equal(self.model_col.coef,
                                             np.array([[-9.359, 1.37, 1.37]]),
                                             decimal=3)
        # loglike_per_sample
        np.testing.assert_array_almost_equal(
            self.model_col.loglike_per_sample(X, self.y_disturbed),
            self.model.loglike_per_sample(self.data_spector.exog[:, 0:1],
                                          self.y_disturbed),
            decimal=3)
        np.testing.assert_array_almost_equal(
            self.model_col.predict(X),
            self.model.predict(self.data_spector.exog[:, 0:1]),
            decimal=3)
class CrossEntropyMNLMultinomialTests(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        cls.data_anes96 = sm.datasets.anes96.load()
        cls.y = label_binarize(cls.data_anes96.endog, classes=list(range(7)))
        cls.y_disturbed = old_div((cls.y + 0.01), 1.07)

    def test_label_encoder(self):
        x = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
        y = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
        X_repeated, Y_repeated, sample_weight_repeated = \
            CrossEntropyMNL._label_encoder(x, y, np.ones(3))
        np.testing.assert_array_equal(
            X_repeated,
            np.array([[1, 2, 3], [1, 2, 3], [1, 2, 3], [4, 5, 6], [4, 5, 6],
                      [4, 5, 6], [7, 8, 9], [7, 8, 9], [7, 8, 9]]))
        np.testing.assert_array_equal(Y_repeated,
                                      np.array([0, 1, 2, 0, 1, 2, 0, 1, 2]))
        np.testing.assert_array_equal(sample_weight_repeated,
                                      np.array([1, 0, 0, 0, 1, 0, 0, 0, 1]))
        # with sample_weight
        x = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
        y = np.array([[0.5, 0.25, 0.25], [0.25, 0.5, 0.25], [0.25, 0.25, 0.5]])
        sample_weight = np.array([0.25, 0.5, 0.25])
        X_repeated, Y_repeated, sample_weight_repeated = \
            CrossEntropyMNL._label_encoder(x, y, sample_weight)
        np.testing.assert_array_equal(
            X_repeated,
            np.array([[1, 2, 3], [1, 2, 3], [1, 2, 3], [4, 5, 6], [4, 5, 6],
                      [4, 5, 6], [7, 8, 9], [7, 8, 9], [7, 8, 9]]))
        np.testing.assert_array_equal(Y_repeated,
                                      np.array([0, 1, 2, 0, 1, 2, 0, 1, 2]))
        np.testing.assert_array_equal(
            sample_weight_repeated,
            np.array([
                0.125, 0.0625, 0.0625, 0.125, 0.25, 0.125, 0.0625, 0.0625,
                0.125
            ]))

    def test_lr(self):
        self.model = CrossEntropyMNL(solver='newton-cg',
                                     fit_intercept=True,
                                     est_stderr=True,
                                     reg_method='l2',
                                     alpha=10,
                                     l1_ratio=0,
                                     tol=1e-4,
                                     max_iter=100,
                                     coef=None,
                                     stderr=None,
                                     n_classes=None)
        self.model.fit(self.data_anes96.exog, self.y)
        # coefficient
        # predict
        self.assertEqual(
            np.sum(
                self.model.predict(self.data_anes96.exog) ==
                self.data_anes96.endog), 333)
        # loglike/_per_sample
        self.assertAlmostEqual(self.model.loglike(self.data_anes96.exog,
                                                  self.y),
                               -1540.888458338286,
                               places=3)
        # to_json
        json_dict = self.model.to_json(
            './tests/linear_models/CrossentropyMNL/Multinomial/')
        self.assertEqual(json_dict['properties']['solver'], 'newton-cg')

        # from_json
        self.model_from_json = CrossEntropyMNL.from_json(json_dict)
        np.testing.assert_array_almost_equal(self.model.coef,
                                             self.model_from_json.coef,
                                             decimal=3)
        np.testing.assert_array_almost_equal(self.model.classes,
                                             np.array(list(range(7))),
                                             decimal=3)
        self.assertEqual(self.model.n_classes, 7)

    def test_lr_disturbed(self):
        self.model = CrossEntropyMNL(solver='newton-cg',
                                     fit_intercept=True,
                                     est_stderr=True,
                                     reg_method='l2',
                                     alpha=10,
                                     l1_ratio=0,
                                     tol=1e-4,
                                     max_iter=100,
                                     coef=None,
                                     stderr=None,
                                     n_classes=None)
        self.model.fit(self.data_anes96.exog, self.y_disturbed)
        # coefficient
        # predict
        self.assertEqual(
            np.sum(
                self.model.predict(self.data_anes96.exog) ==
                self.data_anes96.endog), 335)
        # loglike/_per_sample
        self.assertAlmostEqual(self.model.loglike(self.data_anes96.exog,
                                                  self.y_disturbed),
                               -1580.5280532302786,
                               places=3)

    def test_lr_regularized(self):
        self.model = CrossEntropyMNL(solver='newton-cg',
                                     fit_intercept=True,
                                     est_stderr=True,
                                     reg_method='l2',
                                     alpha=.5,
                                     l1_ratio=0,
                                     tol=1e-4,
                                     max_iter=100,
                                     coef=None,
                                     stderr=None,
                                     n_classes=None)
        self.model.fit(self.data_anes96.exog, self.y)
        # predict
        self.assertEqual(
            np.sum(
                self.model.predict(self.data_anes96.exog) ==
                self.data_anes96.endog), 369)
        # loglike/_per_sample
        self.assertAlmostEqual(self.model.loglike(self.data_anes96.exog,
                                                  self.y),
                               -1466.9886103092626,
                               places=3)

    def test_lr_disturbed_regularized(self):
        self.model = CrossEntropyMNL(solver='newton-cg',
                                     fit_intercept=True,
                                     est_stderr=True,
                                     reg_method='l2',
                                     alpha=.5,
                                     l1_ratio=0,
                                     tol=1e-4,
                                     max_iter=100,
                                     coef=None,
                                     stderr=None,
                                     n_classes=None)
        self.model.fit(self.data_anes96.exog, self.y_disturbed)
        # predict
        self.assertEqual(
            np.sum(
                self.model.predict(self.data_anes96.exog) ==
                self.data_anes96.endog), 366)
        # loglike/_per_sample
        self.assertAlmostEqual(self.model.loglike(self.data_anes96.exog,
                                                  self.y_disturbed),
                               -1519.9521131193064,
                               places=3)

    def test_lr_sample_weight_all_half(self):
        self.model_half = CrossEntropyMNL(solver='newton-cg',
                                          fit_intercept=True,
                                          est_stderr=True,
                                          reg_method='l2',
                                          alpha=0,
                                          l1_ratio=0,
                                          tol=1e-4,
                                          max_iter=100,
                                          coef=None,
                                          stderr=None,
                                          n_classes=None)
        self.model_half.fit(self.data_anes96.exog, self.y, sample_weight=.5)
        self.model = CrossEntropyMNL(solver='newton-cg',
                                     fit_intercept=True,
                                     est_stderr=True,
                                     reg_method='l2',
                                     alpha=0,
                                     l1_ratio=0,
                                     tol=1e-4,
                                     max_iter=100,
                                     coef=None,
                                     stderr=None,
                                     n_classes=None)
        self.model.fit(self.data_anes96.exog, self.y)
        # coefficient
        np.testing.assert_array_almost_equal(self.model.coef,
                                             self.model_half.coef,
                                             decimal=3)
        # predict
        self.assertEqual(
            np.sum(
                self.model_half.predict(self.data_anes96.exog) ==
                self.data_anes96.endog), 372)
        # loglike/_per_sample
        self.assertAlmostEqual(self.model.loglike(self.data_anes96.exog,
                                                  self.y,
                                                  sample_weight=.5),
                               old_div(-1461.92274725, 2.),
                               places=3)

    def test_lr_disturbed_sample_weight_all_half(self):
        self.model_half = CrossEntropyMNL(solver='newton-cg',
                                          fit_intercept=True,
                                          est_stderr=True,
                                          reg_method='l2',
                                          alpha=0,
                                          l1_ratio=0,
                                          tol=1e-4,
                                          max_iter=100,
                                          coef=None,
                                          stderr=None,
                                          n_classes=None)
        self.model_half.fit(self.data_anes96.exog,
                            self.y_disturbed,
                            sample_weight=.5)
        self.model = CrossEntropyMNL(solver='newton-cg',
                                     fit_intercept=True,
                                     est_stderr=True,
                                     reg_method='l2',
                                     alpha=0,
                                     l1_ratio=0,
                                     tol=1e-4,
                                     max_iter=100,
                                     coef=None,
                                     stderr=None,
                                     n_classes=None)
        self.model.fit(self.data_anes96.exog, self.y_disturbed)
        # coefficient
        np.testing.assert_array_almost_equal(self.model.coef,
                                             self.model_half.coef,
                                             decimal=3)
        # predict
        self.assertEqual(
            np.sum(
                self.model_half.predict(self.data_anes96.exog) ==
                self.data_anes96.endog), 367)
        # loglike/_per_sample
        self.assertAlmostEqual(self.model.loglike(self.data_anes96.exog,
                                                  self.y_disturbed,
                                                  sample_weight=.5),
                               old_div(-1516.50148, 2.),
                               places=3)

    def test_lr_sample_weight_all_zero(self):
        self.model = DiscreteMNL(solver='lbfgs',
                                 fit_intercept=True,
                                 est_stderr=True,
                                 reg_method='l2',
                                 alpha=0,
                                 l1_ratio=0,
                                 tol=1e-4,
                                 max_iter=100,
                                 coef=None,
                                 stderr=None,
                                 classes=None)
        self.assertRaises(ValueError, self.model.fit, self.data_anes96.exog,
                          self.y_disturbed, 0)

    def test_lr_sample_weight_half_zero_half_one(self):
        self.model = CrossEntropyMNL(solver='lbfgs',
                                     fit_intercept=True,
                                     est_stderr=True,
                                     reg_method='l2',
                                     alpha=0,
                                     l1_ratio=0,
                                     tol=1e-4,
                                     max_iter=100,
                                     coef=None,
                                     stderr=None,
                                     n_classes=None)
        len_half = 500
        self.model.fit(self.data_anes96.exog,
                       self.y,
                       sample_weight=np.array(
                           [1] * len_half + [0] *
                           (self.data_anes96.exog.shape[0] - len_half)))
        self.model_half = CrossEntropyMNL(solver='lbfgs',
                                          fit_intercept=True,
                                          est_stderr=True,
                                          reg_method='l2',
                                          alpha=0,
                                          l1_ratio=0,
                                          tol=1e-4,
                                          max_iter=100,
                                          coef=None,
                                          stderr=None,
                                          n_classes=None)
        self.model_half.fit(self.data_anes96.exog[:len_half],
                            self.y[:len_half])
        # coefficient
        np.testing.assert_array_almost_equal(self.model.coef,
                                             self.model_half.coef,
                                             decimal=3)

    def test_lr_disturbed_sample_weight_half_zero_half_one(self):
        self.model = CrossEntropyMNL(solver='lbfgs',
                                     fit_intercept=True,
                                     est_stderr=True,
                                     reg_method='l2',
                                     alpha=0,
                                     l1_ratio=0,
                                     tol=1e-4,
                                     max_iter=100,
                                     coef=None,
                                     stderr=None,
                                     n_classes=None)
        len_half = 500
        self.model.fit(self.data_anes96.exog,
                       self.y_disturbed,
                       sample_weight=np.array(
                           [1] * len_half + [0] *
                           (self.data_anes96.exog.shape[0] - len_half)))
        self.model_half = CrossEntropyMNL(solver='lbfgs',
                                          fit_intercept=True,
                                          est_stderr=True,
                                          reg_method='l2',
                                          alpha=0,
                                          l1_ratio=0,
                                          tol=1e-4,
                                          max_iter=100,
                                          coef=None,
                                          stderr=None,
                                          n_classes=None)
        self.model_half.fit(self.data_anes96.exog[:len_half],
                            self.y_disturbed[:len_half])
        # coefficient
        np.testing.assert_array_almost_equal(self.model.coef,
                                             self.model_half.coef,
                                             decimal=3)

    # corner cases
    def test_lr_three_data_point(self):
        # with regularization
        self.model = CrossEntropyMNL(solver='lbfgs',
                                     fit_intercept=True,
                                     est_stderr=True,
                                     reg_method='l2',
                                     alpha=.1,
                                     l1_ratio=0,
                                     tol=1e-4,
                                     max_iter=100,
                                     coef=None,
                                     stderr=None,
                                     n_classes=None)
        self.model.fit(self.data_anes96.exog[6:9, :],
                       self.y[6:9, ],
                       sample_weight=0.5)
        # coef
        self.assertEqual(self.model.coef.shape, (7, 6))
        # loglike_per_sample
        np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
            self.data_anes96.exog[6:9, :], self.y[6:9, ]),
                                             np.array([-0.015, -0.091,
                                                       -0.095]),
                                             decimal=3)
        np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
            self.data_anes96.exog[6:9, :],
            label_binarize([3, 1, 4], list(range(7)))),
                                             np.array([-4.201, -5.094,
                                                       -2.825]),
                                             decimal=3)
        np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
            self.data_anes96.exog[6:9, :],
            label_binarize([3, 0, 5], list(range(7)))),
                                             np.array([-4.201, -7.352,
                                                       -8.957]),
                                             decimal=3)

    def test_lr_disturbed_three_data_point(self):
        # with regularization
        self.model = CrossEntropyMNL(solver='lbfgs',
                                     fit_intercept=True,
                                     est_stderr=True,
                                     reg_method='l2',
                                     alpha=.1,
                                     l1_ratio=0,
                                     tol=1e-4,
                                     max_iter=100,
                                     coef=None,
                                     stderr=None,
                                     n_classes=None)
        self.model.fit(self.data_anes96.exog[6:9, :],
                       self.y_disturbed[6:9, ],
                       sample_weight=0.5)
        # coef
        self.assertEqual(self.model.coef.shape, (7, 6))
        # loglike_per_sample
        np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
            self.data_anes96.exog[6:9, :], self.y_disturbed[6:9, ]),
                                             np.array([-0.336, -0.389,
                                                       -0.398]),
                                             decimal=3)
        np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
            self.data_anes96.exog[6:9, :],
            label_binarize([3, 1, 4], list(range(7)))),
                                             np.array([-3.415, -4.506,
                                                       -2.367]),
                                             decimal=3)
        np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
            self.data_anes96.exog[6:9, :],
            label_binarize([3, 0, 5], list(range(7)))),
                                             np.array([-3.415, -4.492,
                                                       -4.301]),
                                             decimal=3)

    def test_lr_multicolinearty(self):
        self.model_col = CrossEntropyMNL(solver='lbfgs',
                                         fit_intercept=True,
                                         est_stderr=True,
                                         reg_method='l2',
                                         alpha=0,
                                         l1_ratio=0,
                                         tol=1e-4,
                                         max_iter=100,
                                         coef=None,
                                         stderr=None,
                                         n_classes=None)
        X = np.hstack(
            [self.data_anes96.exog[:, 0:1], self.data_anes96.exog[:, 0:1]])
        self.model_col.fit(X, self.y, sample_weight=0.5)
        self.model = CrossEntropyMNL(solver='lbfgs',
                                     fit_intercept=True,
                                     est_stderr=True,
                                     reg_method='l2',
                                     alpha=0,
                                     l1_ratio=0,
                                     tol=1e-4,
                                     max_iter=100,
                                     coef=None,
                                     stderr=None,
                                     n_classes=None)
        self.model.fit(self.data_anes96.exog[:, 0:1],
                       self.y,
                       sample_weight=0.5)
        # loglike_per_sample
        np.testing.assert_array_almost_equal(
            self.model_col.loglike_per_sample(X, self.y),
            self.model.loglike_per_sample(self.data_anes96.exog[:, 0:1],
                                          self.y),
            decimal=3)
        np.testing.assert_array_almost_equal(
            self.model_col.predict(X),
            self.model.predict(self.data_anes96.exog[:, 0:1]),
            decimal=3)

    def test_lr_disturbed_multicolinearty(self):
        self.model_col = CrossEntropyMNL(solver='lbfgs',
                                         fit_intercept=True,
                                         est_stderr=True,
                                         reg_method='l2',
                                         alpha=0,
                                         l1_ratio=0,
                                         tol=1e-4,
                                         max_iter=100,
                                         coef=None,
                                         stderr=None,
                                         n_classes=None)
        X = np.hstack(
            [self.data_anes96.exog[:, 0:1], self.data_anes96.exog[:, 0:1]])
        self.model_col.fit(X, self.y_disturbed, sample_weight=0.5)
        self.model = CrossEntropyMNL(solver='lbfgs',
                                     fit_intercept=True,
                                     est_stderr=True,
                                     reg_method='l2',
                                     alpha=0,
                                     l1_ratio=0,
                                     tol=1e-4,
                                     max_iter=100,
                                     coef=None,
                                     stderr=None,
                                     n_classes=None)
        self.model.fit(self.data_anes96.exog[:, 0:1],
                       self.y_disturbed,
                       sample_weight=0.5)
        # loglike_per_sample
        np.testing.assert_array_almost_equal(
            self.model_col.loglike_per_sample(X, self.y_disturbed),
            self.model.loglike_per_sample(self.data_anes96.exog[:, 0:1],
                                          self.y_disturbed),
            decimal=3)
        np.testing.assert_array_almost_equal(
            self.model_col.predict(X),
            self.model.predict(self.data_anes96.exog[:, 0:1]),
            decimal=3)
    def test_train_multivariate(self):
        self.model = UnSupervisedIOHMM(num_states=2,
                                       max_EM_iter=100,
                                       EM_tol=1e-6)
        self.model.set_models(
            model_initial=CrossEntropyMNL(solver='newton-cg', reg_method='l2'),
            model_transition=CrossEntropyMNL(solver='newton-cg',
                                             reg_method='l2'),
            model_emissions=[OLS(), DiscreteMNL(reg_method='l2')])
        self.model.set_inputs(covariates_initial=[],
                              covariates_transition=[],
                              covariates_emissions=[[], ['Pacc']])
        self.model.set_outputs([['rt'], ['corr']])
        self.model.set_data([self.data_speed])
        self.model.train()

        # emission coefficients
        np.testing.assert_array_almost_equal(
            self.model.model_emissions[0][0].coef,
            np.array([[5.5]]),
            decimal=1)
        np.testing.assert_array_almost_equal(
            self.model.model_emissions[1][0].coef,
            np.array([[6.4]]),
            decimal=1)

        # emission dispersion
        np.testing.assert_array_almost_equal(
            self.model.model_emissions[0][0].dispersion,
            np.array([[0.036]]),
            decimal=2)
        np.testing.assert_array_almost_equal(
            self.model.model_emissions[1][0].dispersion,
            np.array([[0.063]]),
            decimal=2)

        # transition
        np.testing.assert_array_almost_equal(np.exp(
            self.model.model_transition[0].predict_log_proba(
                self.model.inp_transitions_all_sequences)).sum(axis=0),
                                             np.array([387, 51]),
                                             decimal=0)
        np.testing.assert_array_almost_equal(np.exp(
            self.model.model_transition[1].predict_log_proba(
                self.model.inp_transitions_all_sequences)).sum(axis=0),
                                             np.array([37, 401.]),
                                             decimal=0)

        # to_json
        json_dict = self.model.to_json('tests/IOHMM_models/UnSupervisedIOHMM/')
        self.assertEqual(json_dict['data_type'], 'UnSupervisedIOHMM')
        self.assertSetEqual(
            set(json_dict['properties'].keys()),
            set([
                'num_states', 'EM_tol', 'max_EM_iter', 'covariates_initial',
                'covariates_transition', 'covariates_emissions',
                'responses_emissions', 'model_initial', 'model_transition',
                'model_emissions'
            ]))
        with open('tests/IOHMM_models/UnSupervisedIOHMM/model.json',
                  'w') as outfile:
            json.dump(json_dict, outfile, indent=4, sort_keys=True)
class CrossEntropyMNLUnaryTests(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        cls.data_spector = sm.datasets.spector.load()
        cls.y = np.ones((cls.data_spector.endog.shape[0], 1))

    def test_label_encoder(self):
        x = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
        y = np.array([[1], [1], [1]])
        X_repeated, Y_repeated, sample_weight_repeated = \
            CrossEntropyMNL._label_encoder(x, y, np.ones(3))
        np.testing.assert_array_equal(X_repeated, x)
        np.testing.assert_array_equal(Y_repeated, np.array([0, 0, 0]))
        np.testing.assert_array_equal(sample_weight_repeated,
                                      np.array([1, 1, 1]))
        # with sample_weight
        x = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
        y = np.array([[1], [1], [1]])
        sample_weight = np.array([0.25, 0.5, 0.25])
        X_repeated, Y_repeated, sample_weight_repeated = \
            CrossEntropyMNL._label_encoder(x, y, sample_weight)
        np.testing.assert_array_equal(X_repeated, x)
        np.testing.assert_array_equal(Y_repeated, np.array([0, 0, 0]))
        np.testing.assert_array_equal(sample_weight_repeated, sample_weight)

    def test_lr(self):
        self.model = CrossEntropyMNL(solver='lbfgs',
                                     fit_intercept=True,
                                     est_stderr=True,
                                     reg_method='l2',
                                     alpha=0,
                                     l1_ratio=0,
                                     tol=1e-4,
                                     max_iter=100,
                                     coef=None,
                                     stderr=None,
                                     n_classes=None)
        self.model.fit(self.data_spector.exog, self.y)
        # coefficient
        np.testing.assert_array_equal(self.model.coef, np.zeros((4, 1)))

        # predict
        np.testing.assert_array_equal(
            self.model.predict(self.data_spector.exog),
            np.array([0] * self.data_spector.endog.shape[0]))
        # loglike/_per_sample
        np.testing.assert_array_equal(
            self.model.loglike_per_sample(
                self.data_spector.exog,
                np.array([1] * 16 + [0] * 16).reshape(-1, 1)),
            np.array([0] * 16 + [-np.Infinity] * 16))

    def test_lr_sample_weight_all_half(self):
        self.model = CrossEntropyMNL(solver='lbfgs',
                                     fit_intercept=True,
                                     est_stderr=True,
                                     reg_method='l2',
                                     alpha=0,
                                     l1_ratio=0,
                                     tol=1e-4,
                                     max_iter=100,
                                     coef=None,
                                     stderr=None,
                                     n_classes=None)
        self.model.fit(self.data_spector.exog, self.y, sample_weight=.5)
        # coefficient
        np.testing.assert_array_equal(self.model.coef, np.zeros((4, 1)))
        # loglike/_per_sample
        self.assertEqual(
            self.model.loglike(self.data_spector.exog,
                               self.y,
                               sample_weight=.5), 0)

    # corner cases
    def test_lr_one_data_point(self):
        # with regularization
        self.model = CrossEntropyMNL(solver='lbfgs',
                                     fit_intercept=True,
                                     est_stderr=True,
                                     reg_method='l2',
                                     alpha=0,
                                     l1_ratio=0,
                                     tol=1e-4,
                                     max_iter=100,
                                     coef=None,
                                     stderr=None,
                                     n_classes=None)
        self.model.fit(self.data_spector.exog[4:5, :],
                       self.y[4:5, ],
                       sample_weight=0.5)
        # coef
        np.testing.assert_array_equal(self.model.coef, np.zeros((4, 1)))
        # loglike_per_sample
        np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
            self.data_spector.exog[4:6, :],
            np.array([1, 0]).reshape(-1, 1)),
                                             np.array([0, -np.Infinity]),
                                             decimal=3)
        np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
            self.data_spector.exog[4:6, :],
            np.array([1, 1]).reshape(-1, 1)),
                                             np.array([0, 0]),
                                             decimal=3)
    def test_train_no_covariates(self):
        np.random.seed(0)
        self.model = SemiSupervisedIOHMM(num_states=4,
                                         max_EM_iter=100,
                                         EM_tol=1e-10)
        self.model.set_models(model_initial=CrossEntropyMNL(solver='newton-cg',
                                                            reg_method='l2'),
                              model_transition=CrossEntropyMNL(
                                  solver='newton-cg', reg_method='l2'),
                              model_emissions=[OLS()])
        self.model.set_inputs(covariates_initial=[],
                              covariates_transition=[],
                              covariates_emissions=[[]])
        self.model.set_outputs([['rt']])
        self.model.set_data([[self.data_speed, self.states]])
        self.model.train()
        # emission coefficients
        np.testing.assert_array_almost_equal(
            self.model.model_emissions[0][0].coef, np.array([[0]]), decimal=1)
        np.testing.assert_array_almost_equal(
            self.model.model_emissions[1][0].coef, np.array([[1]]), decimal=1)
        np.testing.assert_array_almost_equal(
            self.model.model_emissions[2][0].coef,
            np.array([[6.4]]),
            decimal=1)
        np.testing.assert_array_almost_equal(
            self.model.model_emissions[3][0].coef,
            np.array([[5.5]]),
            decimal=1)

        # emission dispersion
        np.testing.assert_array_almost_equal(
            self.model.model_emissions[0][0].dispersion,
            np.array([[0]]),
            decimal=2)
        np.testing.assert_array_almost_equal(
            self.model.model_emissions[1][0].dispersion,
            np.array([[0]]),
            decimal=2)
        np.testing.assert_array_almost_equal(
            self.model.model_emissions[2][0].dispersion,
            np.array([[0.051]]),
            decimal=2)
        np.testing.assert_array_almost_equal(
            self.model.model_emissions[3][0].dispersion,
            np.array([[0.032]]),
            decimal=2)

        # transition
        np.testing.assert_array_almost_equal(np.exp(
            self.model.model_transition[0].predict_log_proba(np.array([[]]))),
                                             np.array([[0.4, 0.6, 0, 0]]),
                                             decimal=1)
        np.testing.assert_array_almost_equal(np.exp(
            self.model.model_transition[1].predict_log_proba(np.array([[]]))),
                                             np.array([[0.19, 0.81, 0, 0]]),
                                             decimal=2)
        np.testing.assert_array_almost_equal(np.exp(
            self.model.model_transition[2].predict_log_proba(np.array([[]]))),
                                             np.array([[0, 0, 0.93, 0.07]]),
                                             decimal=2)
        np.testing.assert_array_almost_equal(np.exp(
            self.model.model_transition[3].predict_log_proba(np.array([[]]))),
                                             np.array([[0, 0, 0.11, 0.89]]),
                                             decimal=2)

        # to_json
        json_dict = self.model.to_json(
            'tests/IOHMM_models/SemiSupervisedIOHMM/')
        self.assertEqual(json_dict['data_type'], 'SemiSupervisedIOHMM')
        self.assertSetEqual(
            set(json_dict['properties'].keys()),
            set([
                'num_states', 'EM_tol', 'max_EM_iter', 'covariates_initial',
                'covariates_transition', 'covariates_emissions',
                'responses_emissions', 'model_initial', 'model_transition',
                'model_emissions'
            ]))
        with open('tests/IOHMM_models/SemiSupervisedIOHMM/model.json',
                  'w') as outfile:
            json.dump(json_dict, outfile, indent=4, sort_keys=True)
Example #44
0
class CrossEntropyMNLBinaryTests(unittest.TestCase):

    @classmethod
    def setUpClass(cls):
        cls.data_spector = sm.datasets.spector.load()
        cls.y = np.array([
            [1, 0], [1, 0], [1, 0], [1, 0],
            [0, 1], [1, 0], [1, 0], [1, 0],
            [1, 0], [0, 1], [1, 0], [1, 0],
            [1, 0], [0, 1], [1, 0], [1, 0],
            [1, 0], [1, 0], [1, 0], [0, 1],
            [1, 0], [0, 1], [1, 0], [1, 0],
            [0, 1], [0, 1], [0, 1], [1, 0],
            [0, 1], [0, 1], [1, 0], [0, 1]])
        cls.y_disturbed = np.array([
            [0.99, 0.01], [0.99, 0.01], [0.99, 0.01], [0.99, 0.01],
            [0.01, 0.99], [0.99, 0.01], [0.99, 0.01], [0.99, 0.01],
            [0.99, 0.01], [0.01, 0.99], [0.99, 0.01], [0.99, 0.01],
            [0.99, 0.01], [0.01, 0.99], [0.99, 0.01], [0.99, 0.01],
            [0.99, 0.01], [0.99, 0.01], [0.99, 0.01], [0.01, 0.99],
            [0.99, 0.01], [0.01, 0.99], [0.99, 0.01], [0.99, 0.01],
            [0.01, 0.99], [0.01, 0.99], [0.01, 0.99], [0.99, 0.01],
            [0.01, 0.99], [0.01, 0.99], [0.99, 0.01], [0.01, 0.99]])

    def test_lr(self):
        self.model = CrossEntropyMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, n_classes=None)
        self.model.fit(self.data_spector.exog, self.y)
        # coefficient
        np.testing.assert_array_almost_equal(
            self.model.coef,
            np.array([[-13.021, 2.8261, .09515, 2.378]]),
            decimal=3)

        # predict
        np.testing.assert_array_almost_equal(
            self.model.predict(self.data_spector.exog),
            np.array((0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,
                      0.,  0.,  0.,  0.,  0.,  1.,  1.,  0.,  1.,  0.,  1.,  1.,  0.,
                      1.,  0.,  1.,  1.,  1.,  0.)),
            decimal=3)
        # loglike/_per_sample
        self.assertAlmostEqual(
            self.model.loglike(self.data_spector.exog, self.y),
            -12.8896334653335,
            places=3)
        # to_json
        json_dict = self.model.to_json('./tests/linear_models/CrossentropyMNL/Binary/')
        self.assertEqual(json_dict['properties']['solver'], 'lbfgs')

        # from_json
        self.model_from_json = CrossEntropyMNL.from_json(json_dict)
        np.testing.assert_array_almost_equal(
            self.model.coef,
            self.model_from_json.coef,
            decimal=3)
        np.testing.assert_array_almost_equal(
            self.model.classes, np.array([0, 1]), decimal=3)
        self.assertEqual(self.model.n_classes, 2)

    def test_lr_disturbed(self):
        self.model = CrossEntropyMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, n_classes=None)
        self.model.fit(self.data_spector.exog, self.y_disturbed)
        # coefficient
        np.testing.assert_array_almost_equal(
            self.model.coef,
            np.array([[-12.327,  2.686,  0.089,  2.258]]),
            decimal=3)

        # predict
        np.testing.assert_array_almost_equal(
            self.model.predict(self.data_spector.exog),
            np.array((0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,
                      0.,  0.,  0.,  0.,  0.,  1.,  1.,  0.,  1.,  0.,  1.,  1.,  0.,
                      1.,  0.,  1.,  1.,  1.,  0.)),
            decimal=3)
        # loglike/_per_sample
        self.assertAlmostEqual(
            self.model.loglike(self.data_spector.exog, self.y_disturbed),
            -13.366314173353134,
            places=3)

    def test_lr_regularized(self):
        self.model = CrossEntropyMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=.01, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, n_classes=None)
        self.model.fit(self.data_spector.exog, self.y)
        # coefficient
        np.testing.assert_array_almost_equal(
            self.model.coef,
            np.array([[-10.66,   2.364,   0.064,   2.142]]),
            decimal=3)

        # predict
        np.testing.assert_array_almost_equal(
            self.model.predict(self.data_spector.exog),
            np.array((0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,
                      0.,  0.,  0.,  0.,  0.,  1.,  1.,  0.,  1.,  0.,  1.,  1.,  0.,
                      1.,  0.,  1.,  1.,  1.,  0.)),
            decimal=3)
        # loglike/_per_sample
        self.assertAlmostEqual(
            self.model.loglike(self.data_spector.exog, self.y),
            -13.016861222748515,
            places=3)

    def test_lr_sample_weight_all_half(self):
        self.model = CrossEntropyMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, n_classes=None)
        self.model.fit(self.data_spector.exog, self.y, sample_weight=.5)
        # coefficient
        np.testing.assert_array_almost_equal(
            self.model.coef,
            np.array([[-13.021, 2.8261, .09515, 2.378]]),
            decimal=3)
        # loglike/_per_sample
        self.assertAlmostEqual(
            self.model.loglike(self.data_spector.exog, self.y, sample_weight=.5),
            old_div(-12.8896334653335, 2.),
            places=3)

    def test_lr_disturbed_sample_weight_all_half(self):
        self.model = CrossEntropyMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, n_classes=None)
        self.model.fit(self.data_spector.exog, self.y_disturbed, sample_weight=.5)
        # coefficient
        np.testing.assert_array_almost_equal(
            self.model.coef,
            np.array([[-12.327,  2.686,  0.089,  2.258]]),
            decimal=3)

        # predict
        np.testing.assert_array_almost_equal(
            self.model.predict(self.data_spector.exog),
            np.array((0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,
                      0.,  0.,  0.,  0.,  0.,  1.,  1.,  0.,  1.,  0.,  1.,  1.,  0.,
                      1.,  0.,  1.,  1.,  1.,  0.)),
            decimal=3)
        # loglike/_per_sample
        self.assertAlmostEqual(
            self.model.loglike(self.data_spector.exog, self.y_disturbed, sample_weight=.5),
            old_div(-13.366314173353134, 2.),
            places=3)

    def test_lr_sample_weight_all_zero(self):
        self.model = CrossEntropyMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, n_classes=None)
        self.assertRaises(ValueError, self.model.fit,
                          self.data_spector.exog, self.y, 0)

    def test_lr_sample_weight_half_zero_half_one(self):
        self.model = CrossEntropyMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, n_classes=None)
        len_half = 8
        self.model.fit(self.data_spector.exog, self.y,
                       sample_weight=np.array([1] * len_half +
                                              [0] * (self.y.shape[0] - len_half)))
        self.model_half = CrossEntropyMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, n_classes=None)
        self.model_half.fit(self.data_spector.exog[:len_half], self.y[:len_half])
        # coefficient
        np.testing.assert_array_almost_equal(
            self.model.coef,
            self.model_half.coef,
            decimal=3)

    def test_lr_disturbed_sample_weight_half_zero_half_one(self):
        self.model = CrossEntropyMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, n_classes=None)
        len_half = 8
        self.model.fit(self.data_spector.exog, self.y_disturbed,
                       sample_weight=np.array([1] * len_half +
                                              [0] * (self.y_disturbed.shape[0] - len_half)))
        self.model_half = CrossEntropyMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, n_classes=None)
        self.model_half.fit(self.data_spector.exog[:len_half], self.y_disturbed[:len_half])
        # coefficient
        np.testing.assert_array_almost_equal(
            self.model.coef,
            self.model_half.coef,
            decimal=3)

    # corner cases
    def test_lr_two_data_point(self):
        # with regularization
        self.model = CrossEntropyMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=.1, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, n_classes=None)
        self.model.fit(self.data_spector.exog[4:6, :],
                       self.y[4:6, ], sample_weight=0.5)
        # coef
        self.assertEqual(self.model.coef.shape, (1, 4))
        # loglike_per_sample
        np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
            self.data_spector.exog[4:6, :], self.y[4:6, ]),
            np.array([-0.495, -0.661]), decimal=3)
        # loglike_per_sample
        np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
            self.data_spector.exog[4:6, :],
            np.array([[0, 0], [1, 0]])),
            np.array([-np.Infinity, -0.661]), decimal=3)
        np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
            self.data_spector.exog[4:6, :],
            np.array([[0, 0], [0, 1]])),
            np.array([-np.Infinity, -0.726]), decimal=3)

    def test_lr_disturbed_two_data_point(self):
        # with regularization
        self.model = CrossEntropyMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=.1, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, n_classes=None)
        self.model.fit(self.data_spector.exog[4:6, :],
                       self.y_disturbed[4:6, ], sample_weight=0.5)
        # coef
        self.assertEqual(self.model.coef.shape, (1, 4))
        # loglike_per_sample
        np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
            self.data_spector.exog[4:6, :], self.y_disturbed[4:6, ]),
            np.array([-0.503, -0.662]), decimal=3)
        # loglike_per_sample
        np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
            self.data_spector.exog[4:6, :],
            np.array([[0, 0], [0.99, 0.01]])),
            np.array([-np.Infinity, -0.662]), decimal=3)
        np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
            self.data_spector.exog[4:6, :],
            np.array([[0, 0], [0.01, 0.99]])),
            np.array([-np.Infinity, -0.725]), decimal=3)

    def test_lr_multicolinearty(self):
        self.model_col = CrossEntropyMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, n_classes=None)
        X = np.hstack([self.data_spector.exog[:, 0:1], self.data_spector.exog[:, 0:1]])
        self.model_col.fit(X,
                           self.y, sample_weight=0.5)
        self.model = CrossEntropyMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, n_classes=None)
        self.model.fit(self.data_spector.exog[:, 0:1],
                       self.y, sample_weight=0.5)

        np.testing.assert_array_almost_equal(
            self.model_col.coef, np.array([[-9.703,  1.42002783,  1.42002783]]), decimal=3)
        # loglike_per_sample
        np.testing.assert_array_almost_equal(
            self.model_col.loglike_per_sample(X, self.y),
            self.model.loglike_per_sample(self.data_spector.exog[:, 0:1],
                                          self.y), decimal=3)
        np.testing.assert_array_almost_equal(
            self.model_col.predict(X),
            self.model.predict(self.data_spector.exog[:, 0:1]), decimal=3)

    def test_lr_disturbed_multicolinearty(self):
        self.model_col = CrossEntropyMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, n_classes=None)
        X = np.hstack([self.data_spector.exog[:, 0:1], self.data_spector.exog[:, 0:1]])
        self.model_col.fit(X,
                           self.y_disturbed, sample_weight=0.5)
        self.model = CrossEntropyMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, n_classes=None)
        self.model.fit(self.data_spector.exog[:, 0:1],
                       self.y_disturbed, sample_weight=0.5)

        np.testing.assert_array_almost_equal(
            self.model_col.coef, np.array([[-9.359,  1.37,  1.37]]), decimal=3)
        # loglike_per_sample
        np.testing.assert_array_almost_equal(
            self.model_col.loglike_per_sample(X, self.y_disturbed),
            self.model.loglike_per_sample(self.data_spector.exog[:, 0:1],
                                          self.y_disturbed), decimal=3)
        np.testing.assert_array_almost_equal(
            self.model_col.predict(X),
            self.model.predict(self.data_spector.exog[:, 0:1]), decimal=3)
Example #45
0
class CrossEntropyMNLUnaryTests(unittest.TestCase):

    @classmethod
    def setUpClass(cls):
        cls.data_spector = sm.datasets.spector.load()
        cls.y = np.ones((cls.data_spector.endog.shape[0], 1))

    def test_label_encoder(self):
        x = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
        y = np.array([[1], [1], [1]])
        X_repeated, Y_repeated, sample_weight_repeated = \
            CrossEntropyMNL._label_encoder(x, y, np.ones(3))
        np.testing.assert_array_equal(X_repeated, x)
        np.testing.assert_array_equal(
            Y_repeated, np.array([0, 0, 0]))
        np.testing.assert_array_equal(
            sample_weight_repeated,
            np.array([1, 1, 1]))
        # with sample_weight
        x = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
        y = np.array([[1], [1], [1]])
        sample_weight = np.array([0.25, 0.5, 0.25])
        X_repeated, Y_repeated, sample_weight_repeated = \
            CrossEntropyMNL._label_encoder(x, y, sample_weight)
        np.testing.assert_array_equal(X_repeated, x)
        np.testing.assert_array_equal(
            Y_repeated, np.array([0, 0, 0]))
        np.testing.assert_array_equal(
            sample_weight_repeated, sample_weight)

    def test_lr(self):
        self.model = CrossEntropyMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, n_classes=None)
        self.model.fit(self.data_spector.exog, self.y)
        # coefficient
        np.testing.assert_array_equal(
            self.model.coef,
            np.zeros((4, 1)))

        # predict
        np.testing.assert_array_equal(
            self.model.predict(self.data_spector.exog),
            np.array([0] * self.data_spector.endog.shape[0]))
        # loglike/_per_sample
        np.testing.assert_array_equal(
            self.model.loglike_per_sample(self.data_spector.exog,
                                          np.array([1] * 16 + [0] * 16).reshape(-1, 1)),
            np.array([0] * 16 + [-np.Infinity] * 16))

    def test_lr_sample_weight_all_half(self):
        self.model = CrossEntropyMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, n_classes=None)
        self.model.fit(self.data_spector.exog, self.y, sample_weight=.5)
        # coefficient
        np.testing.assert_array_equal(
            self.model.coef,
            np.zeros((4, 1)))
        # loglike/_per_sample
        self.assertEqual(
            self.model.loglike(self.data_spector.exog, self.y, sample_weight=.5), 0)

    # corner cases
    def test_lr_one_data_point(self):
        # with regularization
        self.model = CrossEntropyMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, n_classes=None)
        self.model.fit(self.data_spector.exog[4:5, :],
                       self.y[4:5, ], sample_weight=0.5)
        # coef
        np.testing.assert_array_equal(
            self.model.coef,
            np.zeros((4, 1)))
        # loglike_per_sample
        np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
            self.data_spector.exog[4:6, :], np.array([1, 0]).reshape(-1, 1)),
            np.array([0, -np.Infinity]), decimal=3)
        np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
            self.data_spector.exog[4:6, :], np.array([1, 1]).reshape(-1, 1)),
            np.array([0, 0]), decimal=3)
Example #46
0
class CrossEntropyMNLMultinomialTests(unittest.TestCase):

    @classmethod
    def setUpClass(cls):
        cls.data_anes96 = sm.datasets.anes96.load()
        cls.y = label_binarize(cls.data_anes96.endog, classes=list(range(7)))
        cls.y_disturbed = old_div((cls.y + 0.01), 1.07)

    def test_label_encoder(self):
        x = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
        y = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
        X_repeated, Y_repeated, sample_weight_repeated = \
            CrossEntropyMNL._label_encoder(x, y, np.ones(3))
        np.testing.assert_array_equal(
            X_repeated,
            np.array([
                [1, 2, 3], [1, 2, 3], [1, 2, 3],
                [4, 5, 6], [4, 5, 6], [4, 5, 6],
                [7, 8, 9], [7, 8, 9], [7, 8, 9]]))
        np.testing.assert_array_equal(
            Y_repeated,
            np.array([0, 1, 2, 0, 1, 2, 0, 1, 2]))
        np.testing.assert_array_equal(
            sample_weight_repeated,
            np.array([1, 0, 0, 0, 1, 0, 0, 0, 1]))
        # with sample_weight
        x = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
        y = np.array([[0.5, 0.25, 0.25], [0.25, 0.5, 0.25], [0.25, 0.25, 0.5]])
        sample_weight = np.array([0.25, 0.5, 0.25])
        X_repeated, Y_repeated, sample_weight_repeated = \
            CrossEntropyMNL._label_encoder(x, y, sample_weight)
        np.testing.assert_array_equal(
            X_repeated,
            np.array([
                [1, 2, 3], [1, 2, 3], [1, 2, 3],
                [4, 5, 6], [4, 5, 6], [4, 5, 6],
                [7, 8, 9], [7, 8, 9], [7, 8, 9]]))
        np.testing.assert_array_equal(
            Y_repeated,
            np.array([0, 1, 2, 0, 1, 2, 0, 1, 2]))
        np.testing.assert_array_equal(
            sample_weight_repeated,
            np.array([0.125, 0.0625, 0.0625, 0.125, 0.25, 0.125, 0.0625, 0.0625, 0.125]))

    def test_lr(self):
        self.model = CrossEntropyMNL(
            solver='newton-cg', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=10, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, n_classes=None)
        self.model.fit(self.data_anes96.exog, self.y)
        # coefficient
        # predict
        self.assertEqual(
            np.sum(self.model.predict(self.data_anes96.exog) ==
                   self.data_anes96.endog), 333)
        # loglike/_per_sample
        self.assertAlmostEqual(
            self.model.loglike(self.data_anes96.exog, self.y),
            -1540.888458338286,
            places=3)
        # to_json
        json_dict = self.model.to_json('./tests/linear_models/CrossentropyMNL/Multinomial/')
        self.assertEqual(json_dict['properties']['solver'], 'newton-cg')

        # from_json
        self.model_from_json = CrossEntropyMNL.from_json(json_dict)
        np.testing.assert_array_almost_equal(
            self.model.coef,
            self.model_from_json.coef,
            decimal=3)
        np.testing.assert_array_almost_equal(
            self.model.classes, np.array(list(range(7))), decimal=3)
        self.assertEqual(self.model.n_classes, 7)

    def test_lr_disturbed(self):
        self.model = CrossEntropyMNL(
            solver='newton-cg', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=10, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, n_classes=None)
        self.model.fit(self.data_anes96.exog, self.y_disturbed)
        # coefficient
        # predict
        self.assertEqual(
            np.sum(self.model.predict(self.data_anes96.exog) ==
                   self.data_anes96.endog), 335)
        # loglike/_per_sample
        self.assertAlmostEqual(
            self.model.loglike(self.data_anes96.exog, self.y_disturbed),
            -1580.5280532302786,
            places=3)

    def test_lr_regularized(self):
        self.model = CrossEntropyMNL(
            solver='newton-cg', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=.5, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, n_classes=None)
        self.model.fit(self.data_anes96.exog, self.y)
        # predict
        self.assertEqual(
            np.sum(self.model.predict(self.data_anes96.exog) ==
                   self.data_anes96.endog), 369)
        # loglike/_per_sample
        self.assertAlmostEqual(
            self.model.loglike(self.data_anes96.exog, self.y),
            -1466.9886103092626,
            places=3)

    def test_lr_disturbed_regularized(self):
        self.model = CrossEntropyMNL(
            solver='newton-cg', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=.5, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, n_classes=None)
        self.model.fit(self.data_anes96.exog, self.y_disturbed)
        # predict
        self.assertEqual(
            np.sum(self.model.predict(self.data_anes96.exog) ==
                   self.data_anes96.endog), 366)
        # loglike/_per_sample
        self.assertAlmostEqual(
            self.model.loglike(self.data_anes96.exog, self.y_disturbed),
            -1519.9521131193064,
            places=3)

    def test_lr_sample_weight_all_half(self):
        self.model_half = CrossEntropyMNL(
            solver='newton-cg', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, n_classes=None)
        self.model_half.fit(self.data_anes96.exog, self.y, sample_weight=.5)
        self.model = CrossEntropyMNL(
            solver='newton-cg', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, n_classes=None)
        self.model.fit(self.data_anes96.exog, self.y)
        # coefficient
        np.testing.assert_array_almost_equal(self.model.coef, self.model_half.coef, decimal=3)
        # predict
        self.assertEqual(
            np.sum(self.model_half.predict(self.data_anes96.exog) ==
                   self.data_anes96.endog), 372)
        # loglike/_per_sample
        self.assertAlmostEqual(
            self.model.loglike(self.data_anes96.exog, self.y, sample_weight=.5),
            old_div(-1461.92274725, 2.),
            places=3)

    def test_lr_disturbed_sample_weight_all_half(self):
        self.model_half = CrossEntropyMNL(
            solver='newton-cg', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, n_classes=None)
        self.model_half.fit(self.data_anes96.exog, self.y_disturbed, sample_weight=.5)
        self.model = CrossEntropyMNL(
            solver='newton-cg', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, n_classes=None)
        self.model.fit(self.data_anes96.exog, self.y_disturbed)
        # coefficient
        np.testing.assert_array_almost_equal(self.model.coef, self.model_half.coef, decimal=3)
        # predict
        self.assertEqual(
            np.sum(self.model_half.predict(self.data_anes96.exog) ==
                   self.data_anes96.endog), 367)
        # loglike/_per_sample
        self.assertAlmostEqual(
            self.model.loglike(self.data_anes96.exog, self.y_disturbed, sample_weight=.5),
            old_div(-1516.50148, 2.),
            places=3)

    def test_lr_sample_weight_all_zero(self):
        self.model = DiscreteMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, classes=None)
        self.assertRaises(ValueError, self.model.fit,
                          self.data_anes96.exog, self.y_disturbed, 0)

    def test_lr_sample_weight_half_zero_half_one(self):
        self.model = CrossEntropyMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, n_classes=None)
        len_half = 500
        self.model.fit(self.data_anes96.exog, self.y,
                       sample_weight=np.array([1] * len_half +
                                              [0] * (self.data_anes96.exog.shape[0] - len_half)))
        self.model_half = CrossEntropyMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, n_classes=None)
        self.model_half.fit(self.data_anes96.exog[:len_half], self.y[:len_half])
        # coefficient
        np.testing.assert_array_almost_equal(
            self.model.coef,
            self.model_half.coef,
            decimal=3)

    def test_lr_disturbed_sample_weight_half_zero_half_one(self):
        self.model = CrossEntropyMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, n_classes=None)
        len_half = 500
        self.model.fit(self.data_anes96.exog, self.y_disturbed,
                       sample_weight=np.array([1] * len_half +
                                              [0] * (self.data_anes96.exog.shape[0] - len_half)))
        self.model_half = CrossEntropyMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, n_classes=None)
        self.model_half.fit(self.data_anes96.exog[:len_half], self.y_disturbed[:len_half])
        # coefficient
        np.testing.assert_array_almost_equal(
            self.model.coef,
            self.model_half.coef,
            decimal=3)

    # corner cases
    def test_lr_three_data_point(self):
        # with regularization
        self.model = CrossEntropyMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=.1, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, n_classes=None)
        self.model.fit(self.data_anes96.exog[6:9, :],
                       self.y[6:9, ], sample_weight=0.5)
        # coef
        self.assertEqual(self.model.coef.shape, (7, 6))
        # loglike_per_sample
        np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
            self.data_anes96.exog[6:9, :], self.y[6:9, ]),
            np.array([-0.015, -0.091, -0.095]), decimal=3)
        np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
            self.data_anes96.exog[6:9, :], label_binarize([3, 1, 4], list(range(7)))),
            np.array([-4.201, -5.094, -2.825]), decimal=3)
        np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
            self.data_anes96.exog[6:9, :], label_binarize([3, 0, 5], list(range(7)))),
            np.array([-4.201, -7.352, -8.957]), decimal=3)

    def test_lr_disturbed_three_data_point(self):
        # with regularization
        self.model = CrossEntropyMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=.1, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, n_classes=None)
        self.model.fit(self.data_anes96.exog[6:9, :],
                       self.y_disturbed[6:9, ], sample_weight=0.5)
        # coef
        self.assertEqual(self.model.coef.shape, (7, 6))
        # loglike_per_sample
        np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
            self.data_anes96.exog[6:9, :], self.y_disturbed[6:9, ]),
            np.array([-0.336, -0.389, -0.398]), decimal=3)
        np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
            self.data_anes96.exog[6:9, :], label_binarize([3, 1, 4], list(range(7)))),
            np.array([-3.415, -4.506, -2.367]), decimal=3)
        np.testing.assert_array_almost_equal(self.model.loglike_per_sample(
            self.data_anes96.exog[6:9, :], label_binarize([3, 0, 5], list(range(7)))),
            np.array([-3.415, -4.492, -4.301]), decimal=3)

    def test_lr_multicolinearty(self):
        self.model_col = CrossEntropyMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, n_classes=None)
        X = np.hstack([self.data_anes96.exog[:, 0:1], self.data_anes96.exog[:, 0:1]])
        self.model_col.fit(X,
                           self.y, sample_weight=0.5)
        self.model = CrossEntropyMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, n_classes=None)
        self.model.fit(self.data_anes96.exog[:, 0:1],
                       self.y, sample_weight=0.5)
        # loglike_per_sample
        np.testing.assert_array_almost_equal(
            self.model_col.loglike_per_sample(X, self.y),
            self.model.loglike_per_sample(self.data_anes96.exog[:, 0:1],
                                          self.y), decimal=3)
        np.testing.assert_array_almost_equal(
            self.model_col.predict(X),
            self.model.predict(self.data_anes96.exog[:, 0:1]), decimal=3)

    def test_lr_disturbed_multicolinearty(self):
        self.model_col = CrossEntropyMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, n_classes=None)
        X = np.hstack([self.data_anes96.exog[:, 0:1], self.data_anes96.exog[:, 0:1]])
        self.model_col.fit(X,
                           self.y_disturbed, sample_weight=0.5)
        self.model = CrossEntropyMNL(
            solver='lbfgs', fit_intercept=True, est_stderr=True,
            reg_method='l2',  alpha=0, l1_ratio=0,  tol=1e-4, max_iter=100,
            coef=None, stderr=None, n_classes=None)
        self.model.fit(self.data_anes96.exog[:, 0:1],
                       self.y_disturbed, sample_weight=0.5)
        # loglike_per_sample
        np.testing.assert_array_almost_equal(
            self.model_col.loglike_per_sample(X, self.y_disturbed),
            self.model.loglike_per_sample(self.data_anes96.exog[:, 0:1],
                                          self.y_disturbed), decimal=3)
        np.testing.assert_array_almost_equal(
            self.model_col.predict(X),
            self.model.predict(self.data_anes96.exog[:, 0:1]), decimal=3)