def _construct_preprocessor_obj(self): # TODO later: fix parallel preprocessing preprocessors = list() preprocessors.append(LongitudinalSamplesFilter(n_jobs=1)) if len(self.n_lags) > 0: preprocessors.append( LongitudinalFeaturesLagger(self.n_lags, n_jobs=1)) return preprocessors
def test_loss(self): """Test longitudinal multinomial model loss.""" X, _, _ = LongitudinalFeaturesLagger(n_lags=self.n_lags)\ .fit_transform(self.X) model = ModelSCCS(n_intervals=2, n_lags=self.n_lags)\ .fit(X, self.y) loss = model.loss(coeffs=np.array([0.0, 0.0, 1.0, 0.0])) expected_loss = -np.log((np.e / (2 * np.e) * 1 / (1 + np.e))) / 2 self.assertAlmostEqual(loss, expected_loss)
def simulate_outcomes(self, features): features, _, _ = LongitudinalFeaturesLagger(n_lags=self.n_lags). \ fit_transform(features) if self.distribution == "poisson": # TODO later: add self.max_n_events to allow for multiple outcomes # In this case, the multinomial simulator should use this arg too outcomes = self._simulate_poisson_outcomes(features, self._coeffs) else: outcomes = self._simulate_multinomial_outcomes( features, self._coeffs) return outcomes
def test_grad(self): """Test longitudinal multinomial model gradient value.""" X = [np.array([[0, 0.], [1, 0]]), np.array([[1, 0.], [0, 1]])] X, _, _ = LongitudinalFeaturesLagger(n_lags=self.n_lags) \ .fit_transform(X) model = ModelSCCS(n_intervals=2, n_lags=self.n_lags) \ .fit(X, self.y) grad = model.grad(coeffs=np.array([0.0, 0.0, 1.0, 0.0])) expected_grad = -np.array([ -1 / 2 - 1 / (1 + np.e), 1 - np.e / (1 + np.e), 1 - np.e / (1 + np.e), 0 ]) / 2 np.testing.assert_almost_equal(grad, expected_grad, decimal=15)
def test_grad_loss_consistency(self): """Test longitudinal multinomial model gradient properties.""" sim = SimuSCCS(500, 36, 3, 9, None, True, "infinite", seed=42, verbose=False) X, y, censoring, coeffs = sim.simulate() X = LongitudinalFeaturesLagger(n_lags=9) \ .fit_transform(X, censoring) model = ModelSCCS(n_intervals=36, n_lags=9)\ .fit(X, y, censoring) self._test_grad(model, coeffs) X_sparse = [csr_matrix(x) for x in X] model = ModelSCCS(n_intervals=36, n_lags=9)\ .fit(X_sparse, y, censoring) self._test_grad(model, coeffs)
def test_lipschitz_constant(self): """Test longitudinal multinomial model Lipschitz constant.""" X = [ np.array([[0, 0, 1], [0, 1, 1], [1, 1, 1]], dtype="float64"), np.array([[0, 1, 1], [0, 1, 1], [1, 1, 1]], dtype="float64") ] y = [ np.array([0, 1, 0], dtype="int32"), np.array([0, 1, 0], dtype="int32") ] n_lags = np.repeat(1, 3).astype(dtype="uint64") X, _, _ = LongitudinalFeaturesLagger(n_lags=n_lags) \ .fit_transform(X) model = ModelSCCS(n_intervals=3, n_lags=n_lags).fit(X, y) lip_constant = model.get_lip_max() expected_lip_constant = .5 self.assertEqual(lip_constant, expected_lip_constant)
def test_convergence_with_lags(self): """Test longitudinal multinomial model convergence.""" n_intervals = 10 n_lags = 3 n_samples = 5000 n_features = 3 sim = SimuSCCS(n_samples, n_intervals, n_features, n_lags, None, True, "short", seed=42, verbose=False) X, y, censoring, coeffs = sim.simulate() X = LongitudinalFeaturesLagger(n_lags=n_lags) \ .fit_transform(X, censoring) model = ModelSCCS(n_intervals=n_intervals, n_lags=n_lags).fit(X, y, censoring) solver = SVRG(max_iter=15, verbose=False) solver.set_model(model).set_prox(ProxZero()) coeffs_svrg = solver.solve(step=1/model.get_lip_max()) np.testing.assert_almost_equal(coeffs, coeffs_svrg, decimal=1)
def test_sccs_serialize_and_compare(self): """Test serialization (cereal/pickle) of SCCS.""" X = [ np.array([[0, 0, 1], [0, 1, 1], [1, 1, 1]], dtype="float64"), np.array([[0, 1, 1], [0, 1, 1], [1, 1, 1]], dtype="float64") ] y = [ np.array([0, 1, 0], dtype="int32"), np.array([0, 1, 0], dtype="int32") ] n_lags = np.repeat(1, 3).astype(dtype="uint64") X, _, _ = LongitudinalFeaturesLagger(n_lags=n_lags) \ .fit_transform(X) model = ModelSCCS(n_intervals=3, n_lags=n_lags).fit(X, y) pickled = pickle.loads(pickle.dumps(model)) self.assertTrue(model._model.compare(pickled._model))
def test_grad_loss_consistency(self): """Test longitudinal multinomial model gradient properties.""" n_lags = np.repeat(9, 3).astype(dtype="uint64") sim = SimuSCCS(500, 36, 3, n_lags, None, "single_exposure", seed=42, verbose=False) _, X, y, censoring, coeffs = sim.simulate() coeffs = np.hstack(coeffs) X, _, _ = LongitudinalFeaturesLagger(n_lags=n_lags) \ .fit_transform(X, censoring) model = ModelSCCS(n_intervals=36, n_lags=n_lags)\ .fit(X, y, censoring) self._test_grad(model, coeffs) X_sparse = [csr_matrix(x) for x in X] model = ModelSCCS(n_intervals=36, n_lags=n_lags)\ .fit(X_sparse, y, censoring) self._test_grad(model, coeffs)
def test_convergence_with_lags(self): """Test longitudinal multinomial model convergence.""" n_intervals = 10 n_samples = 800 n_features = 2 n_lags = np.repeat(2, n_features).astype(dtype="uint64") sim = SimuSCCS(n_samples, n_intervals, n_features, n_lags, None, "multiple_exposures", seed=42) _, X, y, censoring, coeffs = sim.simulate() coeffs = np.hstack(coeffs) X, _, _ = LongitudinalFeaturesLagger(n_lags=n_lags) \ .fit_transform(X, censoring) model = ModelSCCS(n_intervals=n_intervals, n_lags=n_lags).fit(X, y, censoring) solver = SVRG(max_iter=15, verbose=False) solver.set_model(model).set_prox(ProxZero()) coeffs_svrg = solver.solve(step=1 / model.get_lip_max()) np.testing.assert_almost_equal(coeffs, coeffs_svrg, decimal=1)
def test_sparse_pre_convolution(self): feat_prod, _, _ = LongitudinalFeaturesLagger(n_lags=self.n_lags)\ .fit_transform(self.sparse_features, censoring=self.censoring) feat_prod = [f.todense() for f in feat_prod] np.testing.assert_equal(feat_prod, self.expected_output)