def test_squared_loss_staged_predict(self): # Test whether staged decision function eventually gives # the same prediction. model = GradientBoostingSurvivalAnalysis(loss="squared", n_estimators=100, max_depth=3, random_state=0) model.fit(self.x, self.y) y_pred = model.predict(self.x) # test if prediction for last stage equals ``predict`` for y in model.staged_predict(self.x): self.assertTupleEqual(y.shape, y_pred.shape) assert_array_equal(y_pred, y) model.set_params(dropout_rate=0.03) model.fit(self.x, self.y) y_pred = model.predict(self.x) # test if prediction for last stage equals ``predict`` for y in model.staged_predict(self.x): self.assertTupleEqual(y.shape, y_pred.shape) assert_array_equal(y_pred, y)
def test_squared_loss_staged_predict(make_whas500): whas500_data = make_whas500(with_std=False, to_numeric=True) # Test whether staged decision function eventually gives # the same prediction. model = GradientBoostingSurvivalAnalysis(loss="squared", n_estimators=100, max_depth=3, random_state=0) model.fit(whas500_data.x, whas500_data.y) y_pred = model.predict(whas500_data.x) # test if prediction for last stage equals ``predict`` for y in model.staged_predict(whas500_data.x): assert y.shape == y_pred.shape assert_array_equal(y_pred, y) model.set_params(dropout_rate=0.03) model.fit(whas500_data.x, whas500_data.y) y_pred = model.predict(whas500_data.x) # test if prediction for last stage equals ``predict`` for y in model.staged_predict(whas500_data.x): assert y.shape == y_pred.shape assert_array_equal(y_pred, y)
def test_fit_int_param_as_float(make_whas500): whas500_data = make_whas500(with_std=False, to_numeric=True) if _sklearn_version_under_0p21: max_depth = 3 else: # Account for https://github.com/scikit-learn/scikit-learn/pull/12344 max_depth = 4 model = GradientBoostingSurvivalAnalysis(n_estimators=100.0, max_depth=float(max_depth), min_samples_split=10.0, random_state=0) params = model.get_params() assert 100 == params["n_estimators"] assert max_depth == params["max_depth"] assert 10 == params["min_samples_split"] model.set_params(max_leaf_nodes=15.0) assert 15 == model.get_params()["max_leaf_nodes"] model.fit(whas500_data.x, whas500_data.y) p = model.predict(whas500_data.x) assert_cindex_almost_equal(whas500_data.y['fstat'], whas500_data.y['lenfol'], p, (0.90256690042449006, 67826, 7321, 2, 14))
def test_negative_ccp_alpha(make_whas500): whas500_data = make_whas500(with_std=False, to_numeric=True) clf = GradientBoostingSurvivalAnalysis() msg = "ccp_alpha must be greater than or equal to 0" with pytest.raises(ValueError, match=msg): clf.set_params(ccp_alpha=-1.0) clf.fit(whas500_data.x, whas500_data.y)
def test_fit_int_param_as_float(self): model = GradientBoostingSurvivalAnalysis(n_estimators=100.0, max_depth=3.0, min_samples_split=10.0, random_state=0) params = model.get_params() self.assertEqual(100, params["n_estimators"]) self.assertEqual(3, params["max_depth"]) self.assertEqual(10, params["min_samples_split"]) model.set_params(max_leaf_nodes=15.0) self.assertEqual(15, model.get_params()["max_leaf_nodes"]) model.fit(self.x, self.y) p = model.predict(self.x) expected_cindex = numpy.array([0.90256690042449006, 67826, 7321, 2, 119]) result = concordance_index_censored(self.y['fstat'], self.y['lenfol'], p) assert_array_almost_equal(expected_cindex, numpy.array(result))
def test_fit_int_param_as_float(make_whas500): whas500_data = make_whas500(with_std=False, to_numeric=True) model = GradientBoostingSurvivalAnalysis(n_estimators=100.0, max_depth=3.0, min_samples_split=10.0, random_state=0) params = model.get_params() assert 100 == params["n_estimators"] assert 3 == params["max_depth"] assert 10 == params["min_samples_split"] model.set_params(max_leaf_nodes=15.0) assert 15 == model.get_params()["max_leaf_nodes"] model.fit(whas500_data.x, whas500_data.y) p = model.predict(whas500_data.x) assert_cindex_almost_equal(whas500_data.y['fstat'], whas500_data.y['lenfol'], p, (0.90256690042449006, 67826, 7321, 2, 119))
def test_max_features(make_whas500): whas500_data = make_whas500(with_std=False, to_numeric=True) model = GradientBoostingSurvivalAnalysis(n_estimators=10, max_features="auto", max_depth=3, random_state=0) model.fit(whas500_data.x, whas500_data.y) assert model.max_features_ == whas500_data.x.shape[1] model.set_params(max_features="sqrt") model.fit(whas500_data.x, whas500_data.y) assert round(abs(model.max_features_ - int(numpy.sqrt(whas500_data.x.shape[1]))), 7) == 0 model.set_params(max_features="log2") model.fit(whas500_data.x, whas500_data.y) assert round(abs(model.max_features_ - int(numpy.log2(whas500_data.x.shape[1]))), 7) == 0 model.set_params(max_features=0.25) model.fit(whas500_data.x, whas500_data.y) assert round(abs(model.max_features_ - int(0.25 * whas500_data.x.shape[1])), 7) == 0 model.set_params(max_features=5) model.fit(whas500_data.x, whas500_data.y) assert round(abs(model.max_features_ - 5), 7) == 0 model.set_params(max_features=-1) with pytest.raises(ValueError, match=r"max_features must be in \(0, n_features\]"): model.fit(whas500_data.x, whas500_data.y) model.set_params(max_features=-1.125) with pytest.raises(ValueError, match=r"max_features must be in \(0, 1.0\]"): model.fit(whas500_data.x, whas500_data.y) model.set_params(max_features="fail_me") with pytest.raises(ValueError, match="Invalid value for max_features: 'fail_me'. " "Allowed string values are 'auto', 'sqrt' " "or 'log2'"): model.fit(whas500_data.x, whas500_data.y)
def test_max_features(self): model = GradientBoostingSurvivalAnalysis(n_estimators=10, max_features="auto", max_depth=3, random_state=0) model.fit(self.x, self.y) self.assertEqual(model.max_features_, self.x.shape[1]) model.set_params(max_features="sqrt") model.fit(self.x, self.y) self.assertAlmostEqual(model.max_features_, int(numpy.sqrt(self.x.shape[1]))) model.set_params(max_features="log2") model.fit(self.x, self.y) self.assertAlmostEqual(model.max_features_, int(numpy.log2(self.x.shape[1]))) model.set_params(max_features=0.25) model.fit(self.x, self.y) self.assertAlmostEqual(model.max_features_, int(0.25 * self.x.shape[1])) model.set_params(max_features=5) model.fit(self.x, self.y) self.assertAlmostEqual(model.max_features_, 5) model.set_params(max_features=-1) self.assertRaisesRegex(ValueError, "max_features must be in \(0, n_features\]", model.fit, self.x, self.y) model.set_params(max_features=-1.125) self.assertRaisesRegex(ValueError, "max_features must be in \(0, 1.0\]", model.fit, self.x, self.y) model.set_params(max_features="fail_me") self.assertRaisesRegex(ValueError, "Invalid value for max_features: 'fail_me'. " "Allowed string values are 'auto', 'sqrt' " "or 'log2'", model.fit, self.x, self.y)