def test_regressor_quantile(output, client, listen_port, alpha): X, y, w, dX, dy, dw = _create_data(objective='regression', output=output) dask_regressor = dlgbm.DaskLGBMRegressor(local_listen_port=listen_port, seed=42, objective='quantile', alpha=alpha, n_estimators=10, num_leaves=10, tree_learner_type='data_parallel') dask_regressor = dask_regressor.fit(dX, dy, client=client, sample_weight=dw) p1 = dask_regressor.predict(dX).compute() q1 = np.count_nonzero(y < p1) / y.shape[0] local_regressor = lightgbm.LGBMRegressor(seed=42, objective='quantile', alpha=alpha, n_estimatores=10, num_leaves=10) local_regressor.fit(X, y, sample_weight=w) p2 = local_regressor.predict(X) q2 = np.count_nonzero(y < p2) / y.shape[0] # Quantiles should be right np.testing.assert_allclose(q1, alpha, atol=0.2) np.testing.assert_allclose(q2, alpha, atol=0.2) client.close()
def test_regressor_pred_contrib(output, client, listen_port): X, y, w, dX, dy, dw = _create_data(objective='regression', output=output) dask_regressor = dlgbm.DaskLGBMRegressor(time_out=5, local_listen_port=listen_port, tree_learner='data', n_estimators=10, num_leaves=10) dask_regressor = dask_regressor.fit(dX, dy, sample_weight=dw, client=client) preds_with_contrib = dask_regressor.predict(dX, pred_contrib=True).compute() local_regressor = lightgbm.LGBMRegressor(n_estimators=10, num_leaves=10) local_regressor.fit(X, y, sample_weight=w) local_preds_with_contrib = local_regressor.predict(X, pred_contrib=True) if output == "scipy_csr_matrix": preds_with_contrib = np.array(preds_with_contrib.todense()) # contrib outputs for distributed training are different than from local training, so we can just test # that the output has the right shape and base values are in the right position num_features = dX.shape[1] assert preds_with_contrib.shape[1] == num_features + 1 assert preds_with_contrib.shape == local_preds_with_contrib.shape
def test_regressor(output, client, listen_port): X, y, w, dX, dy, dw = _create_data(objective='regression', output=output) dask_regressor = dlgbm.DaskLGBMRegressor(time_out=5, local_listen_port=listen_port, seed=42, num_leaves=10, tree='data') dask_regressor = dask_regressor.fit(dX, dy, client=client, sample_weight=dw) p1 = dask_regressor.predict(dX) if output != 'dataframe': s1 = r2_score(dy, p1) p1 = p1.compute() local_regressor = lightgbm.LGBMRegressor(seed=42, num_leaves=10) local_regressor.fit(X, y, sample_weight=w) s2 = local_regressor.score(X, y) p2 = local_regressor.predict(X) # Scores should be the same if output != 'dataframe': assert_eq(s1, s2, atol=.01) # Predictions should be roughly the same assert_eq(y, p1, rtol=1., atol=100.) assert_eq(y, p2, rtol=1., atol=50.) client.close()
def test_regressor_quantile(output, client, listen_port, alpha): X, y, w, dX, dy, dw = _create_data(objective='regression', output=output) params = { "objective": "quantile", "alpha": alpha, "random_state": 42, "n_estimators": 10, "num_leaves": 10 } dask_regressor = dlgbm.DaskLGBMRegressor(local_listen_port=listen_port, tree_learner_type='data_parallel', **params) dask_regressor = dask_regressor.fit(dX, dy, client=client, sample_weight=dw) p1 = dask_regressor.predict(dX).compute() q1 = np.count_nonzero(y < p1) / y.shape[0] local_regressor = lightgbm.LGBMRegressor(**params) local_regressor.fit(X, y, sample_weight=w) p2 = local_regressor.predict(X) q2 = np.count_nonzero(y < p2) / y.shape[0] # Quantiles should be right np.testing.assert_allclose(q1, alpha, atol=0.2) np.testing.assert_allclose(q2, alpha, atol=0.2) client.close()
def test_regressor_local_predict(client, listen_port): X, y, w, dX, dy, dw = _create_data('regression', output='array') dask_regressor = dlgbm.DaskLGBMRegressor(local_listen_port=listen_port, seed=42) dask_regressor = dask_regressor.fit(dX, dy, sample_weight=dw, client=client) p1 = dask_regressor.predict(dX) p2 = dask_regressor.to_local().predict(X) s1 = r2_score(dy, p1) p1 = p1.compute() s2 = dask_regressor.to_local().score(X, y) # Predictions and scores should be the same assert_eq(p1, p2) assert_eq(s1, s2)
def test_regressor_local_predict(client, listen_port): X, y, _, dX, dy, dw = _create_data('regression', output='array') dask_regressor = dlgbm.DaskLGBMRegressor(local_listen_port=listen_port, random_state=42, n_estimators=10, num_leaves=10, tree_type='data') dask_regressor = dask_regressor.fit(dX, dy, sample_weight=dw, client=client) p1 = dask_regressor.predict(dX) p2 = dask_regressor.to_local().predict(X) s1 = r2_score(dy, p1) p1 = p1.compute() s2 = dask_regressor.to_local().score(X, y) # Predictions and scores should be the same assert_eq(p1, p2) assert_eq(s1, s2) client.close()