コード例 #1
0
    def test_squared_loss_staged_predict(self):
        # Test whether staged decision function eventually gives
        # the same prediction.
        model = GradientBoostingSurvivalAnalysis(loss="squared",
                                                 n_estimators=100,
                                                 max_depth=3,
                                                 random_state=0)
        model.fit(self.x, self.y)

        y_pred = model.predict(self.x)

        # test if prediction for last stage equals ``predict``
        for y in model.staged_predict(self.x):
            self.assertTupleEqual(y.shape, y_pred.shape)

        assert_array_equal(y_pred, y)

        model.set_params(dropout_rate=0.03)
        model.fit(self.x, self.y)

        y_pred = model.predict(self.x)

        # test if prediction for last stage equals ``predict``
        for y in model.staged_predict(self.x):
            self.assertTupleEqual(y.shape, y_pred.shape)

        assert_array_equal(y_pred, y)
コード例 #2
0
    def test_fit_verbose(make_whas500):
        whas500_data = make_whas500(with_std=False, to_numeric=True)

        model = GradientBoostingSurvivalAnalysis(n_estimators=10,
                                                 verbose=1,
                                                 random_state=0)
        model.fit(whas500_data.x, whas500_data.y)
コード例 #3
0
    def test_squared_loss_staged_predict(make_whas500):
        whas500_data = make_whas500(with_std=False, to_numeric=True)

        # Test whether staged decision function eventually gives
        # the same prediction.
        model = GradientBoostingSurvivalAnalysis(loss="squared", n_estimators=100, max_depth=3, random_state=0)
        model.fit(whas500_data.x, whas500_data.y)

        y_pred = model.predict(whas500_data.x)

        # test if prediction for last stage equals ``predict``
        for y in model.staged_predict(whas500_data.x):
            assert y.shape == y_pred.shape

        assert_array_equal(y_pred, y)

        model.set_params(dropout_rate=0.03)
        model.fit(whas500_data.x, whas500_data.y)

        y_pred = model.predict(whas500_data.x)

        # test if prediction for last stage equals ``predict``
        for y in model.staged_predict(whas500_data.x):
            assert y.shape == y_pred.shape

        assert_array_equal(y_pred, y)
コード例 #4
0
    def test_fit_subsample(self):
        model = GradientBoostingSurvivalAnalysis(n_estimators=100,
                                                 max_features=8,
                                                 subsample=0.6,
                                                 random_state=0)
        model.fit(self.x, self.y)

        self.assertEquals(model.max_features_, 8)
        self.assertTrue(hasattr(model, "oob_improvement_"))

        incl_mask = numpy.ones(self.x.shape[0], dtype=bool)
        incl_mask[[35, 111, 174, 206, 236, 268, 497]] = False
        x_test = self.x[incl_mask]
        y_test = self.y[incl_mask]

        p = model.predict(x_test)

        expected_cindex = numpy.array([0.8592640, 62905, 10303, 0, 110])
        result = concordance_index_censored(y_test['fstat'], y_test['lenfol'],
                                            p)
        assert_array_almost_equal(expected_cindex, numpy.array(result))

        self.assertTupleEqual((100, ), model.train_score_.shape)
        self.assertTupleEqual((100, ), model.oob_improvement_.shape)

        self.assertRaisesRegex(
            ValueError,
            "Number of features of the model must match the input. "
            "Model n_features is 14 and input n_features is 2 ", model.predict,
            self.x[:, :2])
コード例 #5
0
    def test_presort(make_whas500):
        whas500_data = make_whas500(with_std=False, to_numeric=True)

        model = GradientBoostingSurvivalAnalysis(n_estimators=10, presort=None, random_state=0)
        with pytest.raises(ValueError,
                           match=r"'presort' should be in \('auto', True, False\). Got None instead."):
            model.fit(whas500_data.x, whas500_data.y)
コード例 #6
0
    def test_squared_loss(make_whas500):
        whas500_data = make_whas500(with_std=False, to_numeric=True)

        model = GradientBoostingSurvivalAnalysis(loss="squared",
                                                 n_estimators=100,
                                                 max_depth=3,
                                                 random_state=0)
        model.fit(whas500_data.x, whas500_data.y)

        time_predicted = model.predict(whas500_data.x)
        time_true = whas500_data.y["lenfol"]
        event_true = whas500_data.y["fstat"]

        rmse_all = numpy.sqrt(mean_squared_error(time_true, time_predicted))
        assert round(abs(rmse_all - 580.23345259002951), 7) == 0

        rmse_uncensored = numpy.sqrt(
            mean_squared_error(time_true[event_true],
                               time_predicted[event_true]))
        assert round(abs(rmse_uncensored - 383.10639243317951), 7) == 0

        cindex = model.score(whas500_data.x, whas500_data.y)
        assert round(abs(cindex - 0.9021810004), 7) == 0

        with pytest.raises(
                ValueError,
                match="`fit` must be called with the loss option set to 'coxph'"
        ):
            model.predict_survival_function(whas500_data.x)

        with pytest.raises(
                ValueError,
                match="`fit` must be called with the loss option set to 'coxph'"
        ):
            model.predict_cumulative_hazard_function(whas500_data.x)
コード例 #7
0
    def test_fit(make_whas500):
        whas500_data = make_whas500(with_std=False, to_numeric=True)

        model = GradientBoostingSurvivalAnalysis(n_estimators=100,
                                                 max_depth=3,
                                                 min_samples_split=10,
                                                 random_state=0)
        model.fit(whas500_data.x, whas500_data.y)

        assert model.max_features_ == 14
        assert not hasattr(model, "oob_improvement_")

        p = model.predict(whas500_data.x)

        assert_cindex_almost_equal(whas500_data.y['fstat'],
                                   whas500_data.y['lenfol'], p,
                                   (0.86272605091218779, 64826, 10309, 14, 14))

        assert (100, ) == model.train_score_.shape

        with pytest.raises(
                ValueError,
                match="Number of features of the model must match the input. "
                "Model n_features is 14 and input n_features is 2 "):
            model.predict(whas500_data.x[:, :2])
コード例 #8
0
    def test_fit_subsample(make_whas500):
        whas500_data = make_whas500(with_std=False, to_numeric=True)

        model = GradientBoostingSurvivalAnalysis(n_estimators=50, max_features=8, subsample=0.6,
                                                 presort=False, random_state=0)
        model.fit(whas500_data.x, whas500_data.y)

        assert model.max_features_ == 8
        assert hasattr(model, "oob_improvement_")

        incl_mask = numpy.ones(whas500_data.x.shape[0], dtype=bool)
        incl_mask[[35, 111, 174, 206, 236, 268, 497]] = False
        x_test = whas500_data.x[incl_mask]
        y_test = whas500_data.y[incl_mask]

        p = model.predict(x_test)

        assert_cindex_almost_equal(y_test['fstat'], y_test['lenfol'], p,
                                   (0.8330510326740247, 60985, 12221, 2, 110))

        assert (50,) == model.train_score_.shape
        assert (50,) == model.oob_improvement_.shape

        with pytest.raises(ValueError, match="Number of features of the model must match the input. "
                                             "Model n_features is 14 and input n_features is 2 "):
            model.predict(whas500_data.x[:, :2])
コード例 #9
0
    def test_fit_int_param_as_float(make_whas500):
        whas500_data = make_whas500(with_std=False, to_numeric=True)

        if _sklearn_version_under_0p21:
            max_depth = 3
        else:
            # Account for https://github.com/scikit-learn/scikit-learn/pull/12344
            max_depth = 4

        model = GradientBoostingSurvivalAnalysis(n_estimators=100.0,
                                                 max_depth=float(max_depth),
                                                 min_samples_split=10.0,
                                                 random_state=0)
        params = model.get_params()
        assert 100 == params["n_estimators"]
        assert max_depth == params["max_depth"]
        assert 10 == params["min_samples_split"]

        model.set_params(max_leaf_nodes=15.0)
        assert 15 == model.get_params()["max_leaf_nodes"]

        model.fit(whas500_data.x, whas500_data.y)
        p = model.predict(whas500_data.x)

        assert_cindex_almost_equal(whas500_data.y['fstat'],
                                   whas500_data.y['lenfol'], p,
                                   (0.90256690042449006, 67826, 7321, 2, 14))
コード例 #10
0
    def test_fit(self):
        model = GradientBoostingSurvivalAnalysis(n_estimators=100,
                                                 max_depth=3,
                                                 min_samples_split=10,
                                                 random_state=0)
        model.fit(self.x, self.y)

        self.assertEquals(model.max_features_, 14)
        self.assertFalse(hasattr(model, "oob_improvement_"))

        p = model.predict(self.x)

        expected_cindex = numpy.array(
            [0.86272605091218779, 64826, 10309, 14, 119])
        result = concordance_index_censored(self.y['fstat'], self.y['lenfol'],
                                            p)
        assert_array_almost_equal(expected_cindex, numpy.array(result))

        self.assertTupleEqual((100, ), model.train_score_.shape)

        self.assertRaisesRegex(
            ValueError,
            "Number of features of the model must match the input. "
            "Model n_features is 14 and input n_features is 2 ", model.predict,
            self.x[:, :2])
コード例 #11
0
    def test_presort(make_whas500, presort):
        whas500_data = make_whas500(with_std=False, to_numeric=True)

        model = GradientBoostingSurvivalAnalysis(n_estimators=10, presort=presort, random_state=0)

        with pytest.deprecated_call(match="The parameter 'presort' is deprecated "):
            model.fit(whas500_data.x, whas500_data.y)
コード例 #12
0
 def test_presort(whas500_sparse_data):
     model = GradientBoostingSurvivalAnalysis(n_estimators=10,
                                              presort=True,
                                              random_state=0)
     with pytest.raises(
             ValueError,
             match="Presorting is not supported for sparse matrices."):
         model.fit(whas500_sparse_data.x_sparse, whas500_sparse_data.y)
コード例 #13
0
    def test_negative_ccp_alpha(make_whas500):
        whas500_data = make_whas500(with_std=False, to_numeric=True)

        clf = GradientBoostingSurvivalAnalysis()
        msg = "ccp_alpha must be greater than or equal to 0"

        with pytest.raises(ValueError, match=msg):
            clf.set_params(ccp_alpha=-1.0)
            clf.fit(whas500_data.x, whas500_data.y)
コード例 #14
0
def fit_and_score_features(X, y):
    n_features = X.shape[1]
    scores = np.empty(n_features)
    m = GradientBoostingSurvivalAnalysis(verbose=True, n_estimators=500)
    for j in range(n_features):
        Xj = X[:, j:j + 1]
        m.fit(Xj, y)
        scores[j] = m.score(Xj, y)
    return scores
コード例 #15
0
    def test_monitor_early_stopping(self):
        est = GradientBoostingSurvivalAnalysis(loss="ipcwls", n_estimators=50, max_depth=1,
                                               subsample=0.5,
                                               random_state=0)
        est.fit(self.x, self.y, monitor=early_stopping_monitor)

        self.assertEqual(est.n_estimators, 50)  # this is not altered
        self.assertEqual(est.estimators_.shape[0], 10)
        self.assertEqual(est.train_score_.shape[0], 10)
        self.assertEqual(est.oob_improvement_.shape[0], 10)
コード例 #16
0
    def test_monitor_early_stopping(make_whas500):
        whas500_data = make_whas500(with_std=False, to_numeric=True)

        est = GradientBoostingSurvivalAnalysis(loss="ipcwls", n_estimators=50, max_depth=1,
                                               subsample=0.5,
                                               random_state=0)
        est.fit(whas500_data.x, whas500_data.y, monitor=early_stopping_monitor)

        assert est.n_estimators == 50  # this is not altered
        assert est.estimators_.shape[0] == 10
        assert est.train_score_.shape[0] == 10
        assert est.oob_improvement_.shape[0] == 10
コード例 #17
0
    def test_ipcwls_loss(self):
        model = GradientBoostingSurvivalAnalysis(loss="ipcwls", n_estimators=100, max_depth=3, random_state=0)
        model.fit(self.x, self.y)

        time_predicted = model.predict(self.x)
        time_true = self.y["lenfol"]
        event_true = self.y["fstat"]

        rmse_all = numpy.sqrt(mean_squared_error(time_true, time_predicted))
        self.assertAlmostEqual(rmse_all, 590.5441693629117)

        rmse_uncensored = numpy.sqrt(mean_squared_error(time_true[event_true], time_predicted[event_true]))
        self.assertAlmostEqual(rmse_uncensored, 392.97741487479743)
コード例 #18
0
    def test_squared_loss(self):
        model = GradientBoostingSurvivalAnalysis(loss="squared", n_estimators=100, max_depth=3, random_state=0)
        model.fit(self.x, self.y)

        time_predicted = model.predict(self.x)
        time_true = self.y["lenfol"]
        event_true = self.y["fstat"]

        rmse_all = numpy.sqrt(mean_squared_error(time_true, time_predicted))
        self.assertAlmostEqual(rmse_all, 580.23345259002951)

        rmse_uncensored = numpy.sqrt(mean_squared_error(time_true[event_true], time_predicted[event_true]))
        self.assertAlmostEqual(rmse_uncensored, 383.10639243317951)
コード例 #19
0
    def test_fit_dropout(self):
        model = GradientBoostingSurvivalAnalysis(n_estimators=100, max_features=8,
                                                 learning_rate=1.0, dropout_rate=0.03,
                                                 random_state=0)
        model.fit(self.x, self.y)

        self.assertFalse(hasattr(model, "oob_improvement_"))
        self.assertEquals(model.max_features_, 8)

        p = model.predict(self.x)

        expected_cindex = numpy.array([0.9094333, 68343, 6806, 0, 119])
        result = concordance_index_censored(self.y['fstat'], self.y['lenfol'], p)
        assert_array_almost_equal(expected_cindex, numpy.array(result))
コード例 #20
0
    def test_dropout(whas500_sparse_data, loss):
        model = GradientBoostingSurvivalAnalysis(loss=loss, n_estimators=100, max_depth=1, min_samples_split=10,
                                                 dropout_rate=0.03, random_state=0)
        model.fit(whas500_sparse_data.x_sparse, whas500_sparse_data.y)

        assert model.estimators_.shape[0] == 100
        assert model.train_score_.shape == (100,)

        sparse_predict = model.predict(whas500_sparse_data.x_dense)

        model.fit(whas500_sparse_data.x_dense, whas500_sparse_data.y)
        dense_predict = model.predict(whas500_sparse_data.x_dense)

        assert_array_almost_equal(sparse_predict, dense_predict)
コード例 #21
0
    def test_fit_dropout(make_whas500):
        whas500_data = make_whas500(with_std=False, to_numeric=True)

        model = GradientBoostingSurvivalAnalysis(n_estimators=100, max_features=8,
                                                 learning_rate=1.0, dropout_rate=0.03,
                                                 random_state=0)
        model.fit(whas500_data.x, whas500_data.y)

        assert not hasattr(model, "oob_improvement_")
        assert model.max_features_ == 8

        p = model.predict(whas500_data.x)

        assert_cindex_almost_equal(whas500_data.y['fstat'], whas500_data.y['lenfol'], p,
                                   (0.9094333, 68343, 6806, 0, 119))
コード例 #22
0
    def test_squared_loss(make_whas500):
        whas500_data = make_whas500(with_std=False, to_numeric=True)

        model = GradientBoostingSurvivalAnalysis(loss="squared", n_estimators=100, max_depth=3, random_state=0)
        model.fit(whas500_data.x, whas500_data.y)

        time_predicted = model.predict(whas500_data.x)
        time_true = whas500_data.y["lenfol"]
        event_true = whas500_data.y["fstat"]

        rmse_all = numpy.sqrt(mean_squared_error(time_true, time_predicted))
        assert round(abs(rmse_all - 580.23345259002951), 7) == 0

        rmse_uncensored = numpy.sqrt(mean_squared_error(time_true[event_true], time_predicted[event_true]))
        assert round(abs(rmse_uncensored - 383.10639243317951), 7) == 0
コード例 #23
0
    def test_ipcwls_loss(make_whas500):
        whas500_data = make_whas500(with_std=False, to_numeric=True)

        model = GradientBoostingSurvivalAnalysis(loss="ipcwls", n_estimators=100, max_depth=3, random_state=0)
        model.fit(whas500_data.x, whas500_data.y)

        time_predicted = model.predict(whas500_data.x)
        time_true = whas500_data.y["lenfol"]
        event_true = whas500_data.y["fstat"]

        rmse_all = numpy.sqrt(mean_squared_error(time_true, time_predicted))
        assert round(abs(rmse_all - 590.5441693629117), 7) == 0

        rmse_uncensored = numpy.sqrt(mean_squared_error(time_true[event_true], time_predicted[event_true]))
        assert round(abs(rmse_uncensored - 392.97741487479743), 7) == 0
コード例 #24
0
    def test_dropout(self):
        for loss in ('coxph', 'squared', 'ipcwls'):
            model = GradientBoostingSurvivalAnalysis(loss=loss, n_estimators=100, max_depth=1, min_samples_split=10,
                                                     dropout_rate=0.03, random_state=0)
            model.fit(self.x_sparse, self.y)

            self.assertEqual(model.estimators_.shape[0], 100)
            self.assertTupleEqual(model.train_score_.shape, (100,))

            sparse_predict = model.predict(self.x_dense)

            model.fit(self.x_dense, self.y)
            dense_predict = model.predict(self.x_dense)

            assert_array_almost_equal(sparse_predict, dense_predict)
コード例 #25
0
    def test_fit_int_param_as_float(self):
        model = GradientBoostingSurvivalAnalysis(n_estimators=100.0, max_depth=3.0, min_samples_split=10.0,
                                                 random_state=0)
        params = model.get_params()
        self.assertEqual(100, params["n_estimators"])
        self.assertEqual(3, params["max_depth"])
        self.assertEqual(10, params["min_samples_split"])

        model.set_params(max_leaf_nodes=15.0)
        self.assertEqual(15, model.get_params()["max_leaf_nodes"])

        model.fit(self.x, self.y)
        p = model.predict(self.x)

        expected_cindex = numpy.array([0.90256690042449006, 67826, 7321, 2, 119])
        result = concordance_index_censored(self.y['fstat'], self.y['lenfol'], p)
        assert_array_almost_equal(expected_cindex, numpy.array(result))
コード例 #26
0
    def test_fit_int_param_as_float(make_whas500):
        whas500_data = make_whas500(with_std=False, to_numeric=True)

        model = GradientBoostingSurvivalAnalysis(n_estimators=100.0, max_depth=3.0, min_samples_split=10.0,
                                                 random_state=0)
        params = model.get_params()
        assert 100 == params["n_estimators"]
        assert 3 == params["max_depth"]
        assert 10 == params["min_samples_split"]

        model.set_params(max_leaf_nodes=15.0)
        assert 15 == model.get_params()["max_leaf_nodes"]

        model.fit(whas500_data.x, whas500_data.y)
        p = model.predict(whas500_data.x)

        assert_cindex_almost_equal(whas500_data.y['fstat'], whas500_data.y['lenfol'], p,
                                   (0.90256690042449006, 67826, 7321, 2, 119))
コード例 #27
0
    def test_predict_function(make_whas500, fn, expected_file):
        whas500_data = make_whas500(with_std=False, to_numeric=True)

        model = GradientBoostingSurvivalAnalysis(n_estimators=100,
                                                 max_depth=2,
                                                 random_state=0)
        train_x, train_y = whas500_data.x[10:], whas500_data.y[10:]
        model.fit(train_x, train_y)

        test_x = whas500_data.x[:10]
        surv_fn = getattr(model, fn)(test_x)

        times = numpy.unique(train_y["lenfol"][train_y["fstat"]])
        actual = numpy.row_stack([fn_gb(times) for fn_gb in surv_fn])

        expected = numpy.loadtxt(expected_file, delimiter=",")

        assert_array_almost_equal(actual, expected)
コード例 #28
0
    def test_ccp_alpha(make_whas500):
        whas500_data = make_whas500(with_std=False, to_numeric=True)

        est_full = GradientBoostingSurvivalAnalysis(n_estimators=10,
                                                    max_leaf_nodes=20,
                                                    random_state=1)
        est_full.fit(whas500_data.x, whas500_data.y)

        est_pruned = GradientBoostingSurvivalAnalysis(n_estimators=10,
                                                      max_leaf_nodes=20,
                                                      ccp_alpha=10.0,
                                                      random_state=1)
        est_pruned.fit(whas500_data.x, whas500_data.y)

        tree = est_full.estimators_[0, 0].tree_
        subtree = est_pruned.estimators_[0, 0].tree_
        assert tree.node_count > subtree.node_count
        assert tree.max_depth > subtree.max_depth
コード例 #29
0
    def test_max_features(self):
        model = GradientBoostingSurvivalAnalysis(n_estimators=10,
                                                 max_features="auto",
                                                 max_depth=3,
                                                 random_state=0)
        model.fit(self.x, self.y)

        self.assertEqual(model.max_features_, self.x.shape[1])

        model.set_params(max_features="sqrt")
        model.fit(self.x, self.y)
        self.assertAlmostEqual(model.max_features_,
                               int(numpy.sqrt(self.x.shape[1])))

        model.set_params(max_features="log2")
        model.fit(self.x, self.y)
        self.assertAlmostEqual(model.max_features_,
                               int(numpy.log2(self.x.shape[1])))

        model.set_params(max_features=0.25)
        model.fit(self.x, self.y)
        self.assertAlmostEqual(model.max_features_,
                               int(0.25 * self.x.shape[1]))

        model.set_params(max_features=5)
        model.fit(self.x, self.y)
        self.assertAlmostEqual(model.max_features_, 5)

        model.set_params(max_features=-1)
        self.assertRaisesRegex(ValueError,
                               "max_features must be in \(0, n_features\]",
                               model.fit, self.x, self.y)

        model.set_params(max_features=-1.125)
        self.assertRaisesRegex(ValueError,
                               "max_features must be in \(0, 1.0\]", model.fit,
                               self.x, self.y)

        model.set_params(max_features="fail_me")
        self.assertRaisesRegex(
            ValueError, "Invalid value for max_features: 'fail_me'. "
            "Allowed string values are 'auto', 'sqrt' "
            "or 'log2'", model.fit, self.x, self.y)
コード例 #30
0
    def test_fit_subsample(self):
        model = GradientBoostingSurvivalAnalysis(n_estimators=100, max_features=8, subsample=0.6,
                                                 random_state=0)
        model.fit(self.x, self.y)

        self.assertEquals(model.max_features_, 8)
        self.assertTrue(hasattr(model, "oob_improvement_"))

        p = model.predict(self.x)

        expected_cindex = numpy.array([0.8610760, 64709, 10440, 0, 119])
        result = concordance_index_censored(self.y['fstat'], self.y['lenfol'], p)
        assert_array_almost_equal(expected_cindex, numpy.array(result))

        self.assertTupleEqual((100,), model.train_score_.shape)
        self.assertTupleEqual((100,), model.oob_improvement_.shape)

        self.assertRaisesRegex(ValueError, "Number of features of the model must match the input. "
                                           "Model n_features is 14 and input n_features is 2 ",
                               model.predict, self.x[:, :2])