Beispiel #1
0
    def test_squared_loss_staged_predict(self):
        # Test whether staged decision function eventually gives
        # the same prediction.
        model = GradientBoostingSurvivalAnalysis(loss="squared",
                                                 n_estimators=100,
                                                 max_depth=3,
                                                 random_state=0)
        model.fit(self.x, self.y)

        y_pred = model.predict(self.x)

        # test if prediction for last stage equals ``predict``
        for y in model.staged_predict(self.x):
            self.assertTupleEqual(y.shape, y_pred.shape)

        assert_array_equal(y_pred, y)

        model.set_params(dropout_rate=0.03)
        model.fit(self.x, self.y)

        y_pred = model.predict(self.x)

        # test if prediction for last stage equals ``predict``
        for y in model.staged_predict(self.x):
            self.assertTupleEqual(y.shape, y_pred.shape)

        assert_array_equal(y_pred, y)
Beispiel #2
0
    def test_squared_loss_staged_predict(make_whas500):
        whas500_data = make_whas500(with_std=False, to_numeric=True)

        # Test whether staged decision function eventually gives
        # the same prediction.
        model = GradientBoostingSurvivalAnalysis(loss="squared", n_estimators=100, max_depth=3, random_state=0)
        model.fit(whas500_data.x, whas500_data.y)

        y_pred = model.predict(whas500_data.x)

        # test if prediction for last stage equals ``predict``
        for y in model.staged_predict(whas500_data.x):
            assert y.shape == y_pred.shape

        assert_array_equal(y_pred, y)

        model.set_params(dropout_rate=0.03)
        model.fit(whas500_data.x, whas500_data.y)

        y_pred = model.predict(whas500_data.x)

        # test if prediction for last stage equals ``predict``
        for y in model.staged_predict(whas500_data.x):
            assert y.shape == y_pred.shape

        assert_array_equal(y_pred, y)
    def test_fit_int_param_as_float(make_whas500):
        whas500_data = make_whas500(with_std=False, to_numeric=True)

        if _sklearn_version_under_0p21:
            max_depth = 3
        else:
            # Account for https://github.com/scikit-learn/scikit-learn/pull/12344
            max_depth = 4

        model = GradientBoostingSurvivalAnalysis(n_estimators=100.0,
                                                 max_depth=float(max_depth),
                                                 min_samples_split=10.0,
                                                 random_state=0)
        params = model.get_params()
        assert 100 == params["n_estimators"]
        assert max_depth == params["max_depth"]
        assert 10 == params["min_samples_split"]

        model.set_params(max_leaf_nodes=15.0)
        assert 15 == model.get_params()["max_leaf_nodes"]

        model.fit(whas500_data.x, whas500_data.y)
        p = model.predict(whas500_data.x)

        assert_cindex_almost_equal(whas500_data.y['fstat'],
                                   whas500_data.y['lenfol'], p,
                                   (0.90256690042449006, 67826, 7321, 2, 14))
    def test_negative_ccp_alpha(make_whas500):
        whas500_data = make_whas500(with_std=False, to_numeric=True)

        clf = GradientBoostingSurvivalAnalysis()
        msg = "ccp_alpha must be greater than or equal to 0"

        with pytest.raises(ValueError, match=msg):
            clf.set_params(ccp_alpha=-1.0)
            clf.fit(whas500_data.x, whas500_data.y)
    def test_fit_int_param_as_float(self):
        model = GradientBoostingSurvivalAnalysis(n_estimators=100.0, max_depth=3.0, min_samples_split=10.0,
                                                 random_state=0)
        params = model.get_params()
        self.assertEqual(100, params["n_estimators"])
        self.assertEqual(3, params["max_depth"])
        self.assertEqual(10, params["min_samples_split"])

        model.set_params(max_leaf_nodes=15.0)
        self.assertEqual(15, model.get_params()["max_leaf_nodes"])

        model.fit(self.x, self.y)
        p = model.predict(self.x)

        expected_cindex = numpy.array([0.90256690042449006, 67826, 7321, 2, 119])
        result = concordance_index_censored(self.y['fstat'], self.y['lenfol'], p)
        assert_array_almost_equal(expected_cindex, numpy.array(result))
Beispiel #6
0
    def test_fit_int_param_as_float(make_whas500):
        whas500_data = make_whas500(with_std=False, to_numeric=True)

        model = GradientBoostingSurvivalAnalysis(n_estimators=100.0, max_depth=3.0, min_samples_split=10.0,
                                                 random_state=0)
        params = model.get_params()
        assert 100 == params["n_estimators"]
        assert 3 == params["max_depth"]
        assert 10 == params["min_samples_split"]

        model.set_params(max_leaf_nodes=15.0)
        assert 15 == model.get_params()["max_leaf_nodes"]

        model.fit(whas500_data.x, whas500_data.y)
        p = model.predict(whas500_data.x)

        assert_cindex_almost_equal(whas500_data.y['fstat'], whas500_data.y['lenfol'], p,
                                   (0.90256690042449006, 67826, 7321, 2, 119))
Beispiel #7
0
    def test_max_features(make_whas500):
        whas500_data = make_whas500(with_std=False, to_numeric=True)

        model = GradientBoostingSurvivalAnalysis(n_estimators=10, max_features="auto", max_depth=3, random_state=0)
        model.fit(whas500_data.x, whas500_data.y)

        assert model.max_features_ == whas500_data.x.shape[1]

        model.set_params(max_features="sqrt")
        model.fit(whas500_data.x, whas500_data.y)
        assert round(abs(model.max_features_ - int(numpy.sqrt(whas500_data.x.shape[1]))), 7) == 0

        model.set_params(max_features="log2")
        model.fit(whas500_data.x, whas500_data.y)
        assert round(abs(model.max_features_ - int(numpy.log2(whas500_data.x.shape[1]))), 7) == 0

        model.set_params(max_features=0.25)
        model.fit(whas500_data.x, whas500_data.y)
        assert round(abs(model.max_features_ - int(0.25 * whas500_data.x.shape[1])), 7) == 0

        model.set_params(max_features=5)
        model.fit(whas500_data.x, whas500_data.y)
        assert round(abs(model.max_features_ - 5), 7) == 0

        model.set_params(max_features=-1)
        with pytest.raises(ValueError,
                           match=r"max_features must be in \(0, n_features\]"):
            model.fit(whas500_data.x, whas500_data.y)

        model.set_params(max_features=-1.125)
        with pytest.raises(ValueError,
                           match=r"max_features must be in \(0, 1.0\]"):
            model.fit(whas500_data.x, whas500_data.y)

        model.set_params(max_features="fail_me")
        with pytest.raises(ValueError,
                           match="Invalid value for max_features: 'fail_me'. "
                                 "Allowed string values are 'auto', 'sqrt' "
                                 "or 'log2'"):
            model.fit(whas500_data.x, whas500_data.y)
    def test_max_features(self):
        model = GradientBoostingSurvivalAnalysis(n_estimators=10, max_features="auto", max_depth=3, random_state=0)
        model.fit(self.x, self.y)

        self.assertEqual(model.max_features_, self.x.shape[1])

        model.set_params(max_features="sqrt")
        model.fit(self.x, self.y)
        self.assertAlmostEqual(model.max_features_, int(numpy.sqrt(self.x.shape[1])))

        model.set_params(max_features="log2")
        model.fit(self.x, self.y)
        self.assertAlmostEqual(model.max_features_, int(numpy.log2(self.x.shape[1])))

        model.set_params(max_features=0.25)
        model.fit(self.x, self.y)
        self.assertAlmostEqual(model.max_features_, int(0.25 * self.x.shape[1]))

        model.set_params(max_features=5)
        model.fit(self.x, self.y)
        self.assertAlmostEqual(model.max_features_, 5)

        model.set_params(max_features=-1)
        self.assertRaisesRegex(ValueError,
                               "max_features must be in \(0, n_features\]",
                               model.fit, self.x, self.y)

        model.set_params(max_features=-1.125)
        self.assertRaisesRegex(ValueError,
                               "max_features must be in \(0, 1.0\]",
                               model.fit, self.x, self.y)

        model.set_params(max_features="fail_me")
        self.assertRaisesRegex(ValueError,
                               "Invalid value for max_features: 'fail_me'. "
                               "Allowed string values are 'auto', 'sqrt' "
                               "or 'log2'",
                               model.fit, self.x, self.y)