Esempio n. 1
0
 def _construct_preprocessor_obj(self):
     # TODO later: fix parallel preprocessing
     preprocessors = list()
     preprocessors.append(LongitudinalSamplesFilter(n_jobs=1))
     if len(self.n_lags) > 0:
         preprocessors.append(
             LongitudinalFeaturesLagger(self.n_lags, n_jobs=1))
     return preprocessors
Esempio n. 2
0
 def test_loss(self):
     """Test longitudinal multinomial model loss."""
     X, _, _ = LongitudinalFeaturesLagger(n_lags=self.n_lags)\
         .fit_transform(self.X)
     model = ModelSCCS(n_intervals=2, n_lags=self.n_lags)\
         .fit(X, self.y)
     loss = model.loss(coeffs=np.array([0.0, 0.0, 1.0, 0.0]))
     expected_loss = -np.log((np.e / (2 * np.e) * 1 / (1 + np.e))) / 2
     self.assertAlmostEqual(loss, expected_loss)
Esempio n. 3
0
    def simulate_outcomes(self, features):
        features, _, _ = LongitudinalFeaturesLagger(n_lags=self.n_lags). \
            fit_transform(features)

        if self.distribution == "poisson":
            # TODO later: add self.max_n_events to allow for multiple outcomes
            # In this case, the multinomial simulator should use this arg too
            outcomes = self._simulate_poisson_outcomes(features, self._coeffs)
        else:
            outcomes = self._simulate_multinomial_outcomes(
                features, self._coeffs)
        return outcomes
Esempio n. 4
0
 def test_grad(self):
     """Test longitudinal multinomial model gradient value."""
     X = [np.array([[0, 0.], [1, 0]]), np.array([[1, 0.], [0, 1]])]
     X, _, _ = LongitudinalFeaturesLagger(n_lags=self.n_lags) \
         .fit_transform(X)
     model = ModelSCCS(n_intervals=2, n_lags=self.n_lags) \
         .fit(X, self.y)
     grad = model.grad(coeffs=np.array([0.0, 0.0, 1.0, 0.0]))
     expected_grad = -np.array([
         -1 / 2 - 1 / (1 + np.e), 1 - np.e / (1 + np.e), 1 - np.e /
         (1 + np.e), 0
     ]) / 2
     np.testing.assert_almost_equal(grad, expected_grad, decimal=15)
Esempio n. 5
0
 def test_grad_loss_consistency(self):
     """Test longitudinal multinomial model gradient properties."""
     sim = SimuSCCS(500, 36, 3, 9, None, True, "infinite", seed=42,
                    verbose=False)
     X, y, censoring, coeffs = sim.simulate()
     X = LongitudinalFeaturesLagger(n_lags=9) \
         .fit_transform(X, censoring)
     model = ModelSCCS(n_intervals=36, n_lags=9)\
         .fit(X, y, censoring)
     self._test_grad(model, coeffs)
     X_sparse = [csr_matrix(x) for x in X]
     model = ModelSCCS(n_intervals=36, n_lags=9)\
         .fit(X_sparse, y, censoring)
     self._test_grad(model, coeffs)
Esempio n. 6
0
 def test_lipschitz_constant(self):
     """Test longitudinal multinomial model Lipschitz constant."""
     X = [
         np.array([[0, 0, 1], [0, 1, 1], [1, 1, 1]], dtype="float64"),
         np.array([[0, 1, 1], [0, 1, 1], [1, 1, 1]], dtype="float64")
     ]
     y = [
         np.array([0, 1, 0], dtype="int32"),
         np.array([0, 1, 0], dtype="int32")
     ]
     n_lags = np.repeat(1, 3).astype(dtype="uint64")
     X, _, _ = LongitudinalFeaturesLagger(n_lags=n_lags) \
         .fit_transform(X)
     model = ModelSCCS(n_intervals=3, n_lags=n_lags).fit(X, y)
     lip_constant = model.get_lip_max()
     expected_lip_constant = .5
     self.assertEqual(lip_constant, expected_lip_constant)
Esempio n. 7
0
 def test_convergence_with_lags(self):
     """Test longitudinal multinomial model convergence."""
     n_intervals = 10
     n_lags = 3
     n_samples = 5000
     n_features = 3
     sim = SimuSCCS(n_samples, n_intervals, n_features, n_lags, None,
                    True, "short", seed=42, verbose=False)
     X, y, censoring, coeffs = sim.simulate()
     X = LongitudinalFeaturesLagger(n_lags=n_lags) \
         .fit_transform(X, censoring)
     model = ModelSCCS(n_intervals=n_intervals,
                       n_lags=n_lags).fit(X, y, censoring)
     solver = SVRG(max_iter=15, verbose=False)
     solver.set_model(model).set_prox(ProxZero())
     coeffs_svrg = solver.solve(step=1/model.get_lip_max())
     np.testing.assert_almost_equal(coeffs, coeffs_svrg, decimal=1)
Esempio n. 8
0
    def test_sccs_serialize_and_compare(self):
        """Test serialization (cereal/pickle) of SCCS."""
        X = [
            np.array([[0, 0, 1], [0, 1, 1], [1, 1, 1]], dtype="float64"),
            np.array([[0, 1, 1], [0, 1, 1], [1, 1, 1]], dtype="float64")
        ]
        y = [
            np.array([0, 1, 0], dtype="int32"),
            np.array([0, 1, 0], dtype="int32")
        ]
        n_lags = np.repeat(1, 3).astype(dtype="uint64")
        X, _, _ = LongitudinalFeaturesLagger(n_lags=n_lags) \
            .fit_transform(X)
        model = ModelSCCS(n_intervals=3, n_lags=n_lags).fit(X, y)

        pickled = pickle.loads(pickle.dumps(model))

        self.assertTrue(model._model.compare(pickled._model))
Esempio n. 9
0
 def test_grad_loss_consistency(self):
     """Test longitudinal multinomial model gradient properties."""
     n_lags = np.repeat(9, 3).astype(dtype="uint64")
     sim = SimuSCCS(500,
                    36,
                    3,
                    n_lags,
                    None,
                    "single_exposure",
                    seed=42,
                    verbose=False)
     _, X, y, censoring, coeffs = sim.simulate()
     coeffs = np.hstack(coeffs)
     X, _, _ = LongitudinalFeaturesLagger(n_lags=n_lags) \
         .fit_transform(X, censoring)
     model = ModelSCCS(n_intervals=36, n_lags=n_lags)\
         .fit(X, y, censoring)
     self._test_grad(model, coeffs)
     X_sparse = [csr_matrix(x) for x in X]
     model = ModelSCCS(n_intervals=36, n_lags=n_lags)\
         .fit(X_sparse, y, censoring)
     self._test_grad(model, coeffs)
Esempio n. 10
0
 def test_convergence_with_lags(self):
     """Test longitudinal multinomial model convergence."""
     n_intervals = 10
     n_samples = 800
     n_features = 2
     n_lags = np.repeat(2, n_features).astype(dtype="uint64")
     sim = SimuSCCS(n_samples,
                    n_intervals,
                    n_features,
                    n_lags,
                    None,
                    "multiple_exposures",
                    seed=42)
     _, X, y, censoring, coeffs = sim.simulate()
     coeffs = np.hstack(coeffs)
     X, _, _ = LongitudinalFeaturesLagger(n_lags=n_lags) \
         .fit_transform(X, censoring)
     model = ModelSCCS(n_intervals=n_intervals,
                       n_lags=n_lags).fit(X, y, censoring)
     solver = SVRG(max_iter=15, verbose=False)
     solver.set_model(model).set_prox(ProxZero())
     coeffs_svrg = solver.solve(step=1 / model.get_lip_max())
     np.testing.assert_almost_equal(coeffs, coeffs_svrg, decimal=1)
 def test_sparse_pre_convolution(self):
     feat_prod, _, _ = LongitudinalFeaturesLagger(n_lags=self.n_lags)\
         .fit_transform(self.sparse_features, censoring=self.censoring)
     feat_prod = [f.todense() for f in feat_prod]
     np.testing.assert_equal(feat_prod, self.expected_output)