Esempio n. 1
0
def _get_dmatrix(data: RayDMatrix, param: Dict) -> xgb.DMatrix:
    if isinstance(data, RayDeviceQuantileDMatrix):
        if isinstance(param["data"], list):
            dm_param = {
                "feature_names": data.feature_names,
                "feature_types": data.feature_types,
                "missing": data.missing,
            }
            if not isinstance(data, xgb.DeviceQuantileDMatrix):
                pass
            param.update(dm_param)
            it = RayDataIter(**param)
            matrix = xgb.DeviceQuantileDMatrix(it, **dm_param)
        else:
            matrix = xgb.DeviceQuantileDMatrix(**param)
    else:
        if isinstance(param["data"], list):
            dm_param = {
                "data": concat_dataframes(param["data"]),
                "label": concat_dataframes(param["label"]),
                "weight": concat_dataframes(param["weight"]),
                "base_margin": concat_dataframes(param["base_margin"]),
                "label_lower_bound":
                concat_dataframes(param["label_lower_bound"]),
                "label_upper_bound":
                concat_dataframes(param["label_upper_bound"]),
            }
            param.update(dm_param)

        ll = param.pop("label_lower_bound", None)
        lu = param.pop("label_upper_bound", None)

        matrix = xgb.DMatrix(**param)
        matrix.set_info(label_lower_bound=ll, label_upper_bound=lu)
    return matrix
Esempio n. 2
0
 def test_dlpack_device_dmat(self):
     import cupy as cp
     n = 100
     X = cp.random.random((n, 2))
     m = xgb.DeviceQuantileDMatrix(X.toDlpack())
     with pytest.raises(xgb.core.XGBoostError):
         m.slice(rindex=[0, 1, 2])
Esempio n. 3
0
    def run_invalid_category(self, tree_method: str) -> None:
        rng = np.random.default_rng()
        # too large
        X = rng.integers(low=0, high=4, size=1000).reshape(100, 10)
        y = rng.normal(loc=0, scale=1, size=100)
        X[13, 7] = np.iinfo(np.int32).max + 1

        # Check is performed during sketching.
        Xy = xgb.DMatrix(X, y, feature_types=["c"] * 10)
        with pytest.raises(ValueError):
            xgb.train({"tree_method": tree_method}, Xy)

        X[13, 7] = 16777216
        Xy = xgb.DMatrix(X, y, feature_types=["c"] * 10)
        with pytest.raises(ValueError):
            xgb.train({"tree_method": tree_method}, Xy)

        # mixed positive and negative values
        X = rng.normal(loc=0, scale=1, size=1000).reshape(100, 10)
        y = rng.normal(loc=0, scale=1, size=100)

        Xy = xgb.DMatrix(X, y, feature_types=["c"] * 10)
        with pytest.raises(ValueError):
            xgb.train({"tree_method": tree_method}, Xy)

        if tree_method == "gpu_hist":
            import cupy as cp

            X, y = cp.array(X), cp.array(y)
            with pytest.raises(ValueError):
                Xy = xgb.DeviceQuantileDMatrix(X, y, feature_types=["c"] * 10)
Esempio n. 4
0
    def test_cudf_categorical(self):
        import cudf
        _X, _y = tm.make_categorical(100, 30, 17, False)
        X = cudf.from_pandas(_X)
        y = cudf.from_pandas(_y)

        Xy = xgb.DMatrix(X, y, enable_categorical=True)
        assert len(Xy.feature_types) == X.shape[1]
        assert all(t == "c" for t in Xy.feature_types)

        Xy = xgb.DeviceQuantileDMatrix(X, y, enable_categorical=True)
        assert len(Xy.feature_types) == X.shape[1]
        assert all(t == "c" for t in Xy.feature_types)

        # test missing value
        X = cudf.DataFrame({"f0": ["a", "b", np.NaN]})
        X["f0"] = X["f0"].astype("category")
        df, cat_codes, _, _ = xgb.data._transform_cudf_df(
            X, None, None, enable_categorical=True)
        for col in cat_codes:
            assert col.has_nulls

        y = [0, 1, 2]
        with pytest.raises(ValueError):
            xgb.DMatrix(X, y)
        Xy = xgb.DMatrix(X, y, enable_categorical=True)
        assert Xy.num_row() == 3
        assert Xy.num_col() == 1

        with pytest.raises(ValueError):
            xgb.DeviceQuantileDMatrix(X, y)

        Xy = xgb.DeviceQuantileDMatrix(X, y, enable_categorical=True)
        assert Xy.num_row() == 3
        assert Xy.num_col() == 1

        X = X["f0"]
        with pytest.raises(ValueError):
            xgb.DMatrix(X, y)

        Xy = xgb.DMatrix(X, y, enable_categorical=True)
        assert Xy.num_row() == 3
        assert Xy.num_col() == 1
Esempio n. 5
0
def main():
    # 比较适合 GPU 训练
    Xy_train = IterLoadForDMatrix(train.loc[train_idx], FEATURES, "target")
    dtrain = xgb.DeviceQuantileDMatrix(Xy_train, max_bin = 256)

    # 比较适合 CPU 训练
    it = Iterator(["file_0.svm", "file_1.svm", "file_2.svm"])
    Xy = xgb.DMatrix(it)

    # Other tree methods including ``hist`` and ``gpu_hist`` also work, 
    # but has some caveats as noted in following sections.
    booster = xgb.train({"tree_method": "approx"}, Xy)
Esempio n. 6
0
def test_large_input():
    available_bytes, _ = cp.cuda.runtime.memGetInfo()
    # 15 GB
    required_bytes = 1.5e+10
    if available_bytes < required_bytes:
        pytest.skip("Not enough memory on this device")
    n = 1000
    m = ((1 << 31) + n - 1) // n
    assert (np.log2(m * n) > 31)
    X = cp.ones((m, n), dtype=np.float32)
    y = cp.ones(m)
    dmat = xgb.DeviceQuantileDMatrix(X, y)
    xgb.train({"tree_method": "gpu_hist", "max_depth": 1}, dmat, 1)
Esempio n. 7
0
    def test_categorical(self):
        import cudf
        _X, _y = tm.make_categorical(100, 30, 17, False)
        X = cudf.from_pandas(_X)
        y = cudf.from_pandas(_y)

        Xy = xgb.DMatrix(X, y, enable_categorical=True)
        assert len(Xy.feature_types) == X.shape[1]
        assert all(t == "categorical" for t in Xy.feature_types)

        Xy = xgb.DeviceQuantileDMatrix(X, y, enable_categorical=True)
        assert len(Xy.feature_types) == X.shape[1]
        assert all(t == "categorical" for t in Xy.feature_types)
Esempio n. 8
0
    def test_invalid_categorical(self):
        import cupy as cp
        rng = np.random.default_rng()
        X = rng.normal(loc=0, scale=1, size=1000).reshape(100, 10)
        y = rng.normal(loc=0, scale=1, size=100)

        # Check is performe during sketching.
        Xy = xgb.DMatrix(X, y, feature_types=["c"] * 10)
        with pytest.raises(ValueError):
            xgb.train({"tree_method": "gpu_hist"}, Xy)

        X, y = cp.array(X), cp.array(y)
        with pytest.raises(ValueError):
            Xy = xgb.DeviceQuantileDMatrix(X, y, feature_types=["c"] * 10)
Esempio n. 9
0
    def test_metainfo(self) -> None:
        import cupy as cp
        rng = cp.random.RandomState(1994)

        rows = 10
        cols = 3
        data = rng.randn(rows, cols)

        labels = rng.randn(rows)

        fw = rng.randn(rows)
        fw -= fw.min()

        m = xgb.DeviceQuantileDMatrix(data=data, label=labels, feature_weights=fw)

        got_fw = m.get_float_info("feature_weights")
        got_labels = m.get_label()

        cp.testing.assert_allclose(fw, got_fw)
        cp.testing.assert_allclose(labels, got_labels)
Esempio n. 10
0
def test_from_cudf_iter():
    rounds = 100
    it = IterForDMatrixTest()

    # Use iterator
    m_it = xgb.DeviceQuantileDMatrix(it)
    reg_with_it = xgb.train({'tree_method': 'gpu_hist'},
                            m_it,
                            num_boost_round=rounds)
    predict_with_it = reg_with_it.predict(m_it)

    # Without using iterator
    m = xgb.DMatrix(it.as_array(), it.as_array_labels())

    assert m_it.num_col() == m.num_col()
    assert m_it.num_row() == m.num_row()

    reg = xgb.train({'tree_method': 'gpu_hist'}, m, num_boost_round=rounds)
    predict = reg.predict(m)

    np.testing.assert_allclose(predict_with_it, predict)
Esempio n. 11
0
def test_from_cudf_iter(enable_categorical):
    rounds = 100
    it = IterForDMatrixTest(enable_categorical)
    params = {"tree_method": "gpu_hist"}

    # Use iterator
    m_it = xgb.DeviceQuantileDMatrix(it, enable_categorical=enable_categorical)
    reg_with_it = xgb.train(params, m_it, num_boost_round=rounds)

    X = it.as_array()
    y = it.as_array_labels()

    m = xgb.DMatrix(X, y, enable_categorical=enable_categorical)

    assert m_it.num_col() == m.num_col()
    assert m_it.num_row() == m.num_row()

    reg = xgb.train(params, m, num_boost_round=rounds)

    predict = reg.predict(m)
    predict_with_it = reg_with_it.predict(m_it)
    np.testing.assert_allclose(predict_with_it, predict)
def main():
    rounds = 100
    it = IterForDMatrixDemo()

    # Use iterator, must be `DeviceQuantileDMatrix`
    m_with_it = xgboost.DeviceQuantileDMatrix(it)

    # Use regular DMatrix.
    m = xgboost.DMatrix(it.as_array(),
                        it.as_array_labels(),
                        weight=it.as_array_weights())

    assert m_with_it.num_col() == m.num_col()
    assert m_with_it.num_row() == m.num_row()

    reg_with_it = xgboost.train({'tree_method': 'gpu_hist'},
                                m_with_it,
                                num_boost_round=rounds)
    predict_with_it = reg_with_it.predict(m_with_it)

    reg = xgboost.train({'tree_method': 'gpu_hist'}, m, num_boost_round=rounds)
    predict = reg.predict(m)

    numpy.testing.assert_allclose(predict_with_it, predict, rtol=1e6)
Esempio n. 13
0
 def test_dmatrix_cupy_init(self):
     import cupy as cp
     data = cp.random.randn(5, 5)
     dm = xgb.DeviceQuantileDMatrix(data, cp.ones(5, dtype=np.float64))
Esempio n. 14
0
 def test_dmatrix_numpy_init(self):
     data = np.random.randn(5, 5)
     with pytest.raises(AssertionError,
                        match='is not supported for DeviceQuantileDMatrix'):
         dm = xgb.DeviceQuantileDMatrix(data, np.ones(5, dtype=np.float64))
Esempio n. 15
0
 def test_dlpack_device_dmat(self):
     import cupy as cp
     n = 100
     X = cp.random.random((n, 2))
     xgb.DeviceQuantileDMatrix(X.toDlpack())
Esempio n. 16
0
 def get_device_dmat(self):
     w = None if self.w is None else cp.array(self.w)
     X = cp.array(self.X, dtype=np.float32)
     y = cp.array(self.y, dtype=np.float32)
     return xgb.DeviceQuantileDMatrix(X, y, w, base_margin=self.margin)