def _get_dmatrix(data: RayDMatrix, param: Dict) -> xgb.DMatrix: if isinstance(data, RayDeviceQuantileDMatrix): if isinstance(param["data"], list): dm_param = { "feature_names": data.feature_names, "feature_types": data.feature_types, "missing": data.missing, } if not isinstance(data, xgb.DeviceQuantileDMatrix): pass param.update(dm_param) it = RayDataIter(**param) matrix = xgb.DeviceQuantileDMatrix(it, **dm_param) else: matrix = xgb.DeviceQuantileDMatrix(**param) else: if isinstance(param["data"], list): dm_param = { "data": concat_dataframes(param["data"]), "label": concat_dataframes(param["label"]), "weight": concat_dataframes(param["weight"]), "base_margin": concat_dataframes(param["base_margin"]), "label_lower_bound": concat_dataframes(param["label_lower_bound"]), "label_upper_bound": concat_dataframes(param["label_upper_bound"]), } param.update(dm_param) ll = param.pop("label_lower_bound", None) lu = param.pop("label_upper_bound", None) matrix = xgb.DMatrix(**param) matrix.set_info(label_lower_bound=ll, label_upper_bound=lu) return matrix
def test_dlpack_device_dmat(self): import cupy as cp n = 100 X = cp.random.random((n, 2)) m = xgb.DeviceQuantileDMatrix(X.toDlpack()) with pytest.raises(xgb.core.XGBoostError): m.slice(rindex=[0, 1, 2])
def run_invalid_category(self, tree_method: str) -> None: rng = np.random.default_rng() # too large X = rng.integers(low=0, high=4, size=1000).reshape(100, 10) y = rng.normal(loc=0, scale=1, size=100) X[13, 7] = np.iinfo(np.int32).max + 1 # Check is performed during sketching. Xy = xgb.DMatrix(X, y, feature_types=["c"] * 10) with pytest.raises(ValueError): xgb.train({"tree_method": tree_method}, Xy) X[13, 7] = 16777216 Xy = xgb.DMatrix(X, y, feature_types=["c"] * 10) with pytest.raises(ValueError): xgb.train({"tree_method": tree_method}, Xy) # mixed positive and negative values X = rng.normal(loc=0, scale=1, size=1000).reshape(100, 10) y = rng.normal(loc=0, scale=1, size=100) Xy = xgb.DMatrix(X, y, feature_types=["c"] * 10) with pytest.raises(ValueError): xgb.train({"tree_method": tree_method}, Xy) if tree_method == "gpu_hist": import cupy as cp X, y = cp.array(X), cp.array(y) with pytest.raises(ValueError): Xy = xgb.DeviceQuantileDMatrix(X, y, feature_types=["c"] * 10)
def test_cudf_categorical(self): import cudf _X, _y = tm.make_categorical(100, 30, 17, False) X = cudf.from_pandas(_X) y = cudf.from_pandas(_y) Xy = xgb.DMatrix(X, y, enable_categorical=True) assert len(Xy.feature_types) == X.shape[1] assert all(t == "c" for t in Xy.feature_types) Xy = xgb.DeviceQuantileDMatrix(X, y, enable_categorical=True) assert len(Xy.feature_types) == X.shape[1] assert all(t == "c" for t in Xy.feature_types) # test missing value X = cudf.DataFrame({"f0": ["a", "b", np.NaN]}) X["f0"] = X["f0"].astype("category") df, cat_codes, _, _ = xgb.data._transform_cudf_df( X, None, None, enable_categorical=True) for col in cat_codes: assert col.has_nulls y = [0, 1, 2] with pytest.raises(ValueError): xgb.DMatrix(X, y) Xy = xgb.DMatrix(X, y, enable_categorical=True) assert Xy.num_row() == 3 assert Xy.num_col() == 1 with pytest.raises(ValueError): xgb.DeviceQuantileDMatrix(X, y) Xy = xgb.DeviceQuantileDMatrix(X, y, enable_categorical=True) assert Xy.num_row() == 3 assert Xy.num_col() == 1 X = X["f0"] with pytest.raises(ValueError): xgb.DMatrix(X, y) Xy = xgb.DMatrix(X, y, enable_categorical=True) assert Xy.num_row() == 3 assert Xy.num_col() == 1
def main(): # 比较适合 GPU 训练 Xy_train = IterLoadForDMatrix(train.loc[train_idx], FEATURES, "target") dtrain = xgb.DeviceQuantileDMatrix(Xy_train, max_bin = 256) # 比较适合 CPU 训练 it = Iterator(["file_0.svm", "file_1.svm", "file_2.svm"]) Xy = xgb.DMatrix(it) # Other tree methods including ``hist`` and ``gpu_hist`` also work, # but has some caveats as noted in following sections. booster = xgb.train({"tree_method": "approx"}, Xy)
def test_large_input(): available_bytes, _ = cp.cuda.runtime.memGetInfo() # 15 GB required_bytes = 1.5e+10 if available_bytes < required_bytes: pytest.skip("Not enough memory on this device") n = 1000 m = ((1 << 31) + n - 1) // n assert (np.log2(m * n) > 31) X = cp.ones((m, n), dtype=np.float32) y = cp.ones(m) dmat = xgb.DeviceQuantileDMatrix(X, y) xgb.train({"tree_method": "gpu_hist", "max_depth": 1}, dmat, 1)
def test_categorical(self): import cudf _X, _y = tm.make_categorical(100, 30, 17, False) X = cudf.from_pandas(_X) y = cudf.from_pandas(_y) Xy = xgb.DMatrix(X, y, enable_categorical=True) assert len(Xy.feature_types) == X.shape[1] assert all(t == "categorical" for t in Xy.feature_types) Xy = xgb.DeviceQuantileDMatrix(X, y, enable_categorical=True) assert len(Xy.feature_types) == X.shape[1] assert all(t == "categorical" for t in Xy.feature_types)
def test_invalid_categorical(self): import cupy as cp rng = np.random.default_rng() X = rng.normal(loc=0, scale=1, size=1000).reshape(100, 10) y = rng.normal(loc=0, scale=1, size=100) # Check is performe during sketching. Xy = xgb.DMatrix(X, y, feature_types=["c"] * 10) with pytest.raises(ValueError): xgb.train({"tree_method": "gpu_hist"}, Xy) X, y = cp.array(X), cp.array(y) with pytest.raises(ValueError): Xy = xgb.DeviceQuantileDMatrix(X, y, feature_types=["c"] * 10)
def test_metainfo(self) -> None: import cupy as cp rng = cp.random.RandomState(1994) rows = 10 cols = 3 data = rng.randn(rows, cols) labels = rng.randn(rows) fw = rng.randn(rows) fw -= fw.min() m = xgb.DeviceQuantileDMatrix(data=data, label=labels, feature_weights=fw) got_fw = m.get_float_info("feature_weights") got_labels = m.get_label() cp.testing.assert_allclose(fw, got_fw) cp.testing.assert_allclose(labels, got_labels)
def test_from_cudf_iter(): rounds = 100 it = IterForDMatrixTest() # Use iterator m_it = xgb.DeviceQuantileDMatrix(it) reg_with_it = xgb.train({'tree_method': 'gpu_hist'}, m_it, num_boost_round=rounds) predict_with_it = reg_with_it.predict(m_it) # Without using iterator m = xgb.DMatrix(it.as_array(), it.as_array_labels()) assert m_it.num_col() == m.num_col() assert m_it.num_row() == m.num_row() reg = xgb.train({'tree_method': 'gpu_hist'}, m, num_boost_round=rounds) predict = reg.predict(m) np.testing.assert_allclose(predict_with_it, predict)
def test_from_cudf_iter(enable_categorical): rounds = 100 it = IterForDMatrixTest(enable_categorical) params = {"tree_method": "gpu_hist"} # Use iterator m_it = xgb.DeviceQuantileDMatrix(it, enable_categorical=enable_categorical) reg_with_it = xgb.train(params, m_it, num_boost_round=rounds) X = it.as_array() y = it.as_array_labels() m = xgb.DMatrix(X, y, enable_categorical=enable_categorical) assert m_it.num_col() == m.num_col() assert m_it.num_row() == m.num_row() reg = xgb.train(params, m, num_boost_round=rounds) predict = reg.predict(m) predict_with_it = reg_with_it.predict(m_it) np.testing.assert_allclose(predict_with_it, predict)
def main(): rounds = 100 it = IterForDMatrixDemo() # Use iterator, must be `DeviceQuantileDMatrix` m_with_it = xgboost.DeviceQuantileDMatrix(it) # Use regular DMatrix. m = xgboost.DMatrix(it.as_array(), it.as_array_labels(), weight=it.as_array_weights()) assert m_with_it.num_col() == m.num_col() assert m_with_it.num_row() == m.num_row() reg_with_it = xgboost.train({'tree_method': 'gpu_hist'}, m_with_it, num_boost_round=rounds) predict_with_it = reg_with_it.predict(m_with_it) reg = xgboost.train({'tree_method': 'gpu_hist'}, m, num_boost_round=rounds) predict = reg.predict(m) numpy.testing.assert_allclose(predict_with_it, predict, rtol=1e6)
def test_dmatrix_cupy_init(self): import cupy as cp data = cp.random.randn(5, 5) dm = xgb.DeviceQuantileDMatrix(data, cp.ones(5, dtype=np.float64))
def test_dmatrix_numpy_init(self): data = np.random.randn(5, 5) with pytest.raises(AssertionError, match='is not supported for DeviceQuantileDMatrix'): dm = xgb.DeviceQuantileDMatrix(data, np.ones(5, dtype=np.float64))
def test_dlpack_device_dmat(self): import cupy as cp n = 100 X = cp.random.random((n, 2)) xgb.DeviceQuantileDMatrix(X.toDlpack())
def get_device_dmat(self): w = None if self.w is None else cp.array(self.w) X = cp.array(self.X, dtype=np.float32) y = cp.array(self.y, dtype=np.float32) return xgb.DeviceQuantileDMatrix(X, y, w, base_margin=self.margin)