예제 #1
0
def test_regressor(loop):  # noqa
    with cluster() as (s, [a, b]):
        with Client(s['address'], loop=loop):
            a = dlgbm.LGBMRegressor(local_listen_port=16400)
            X2 = da.from_array(X, 5)
            y2 = da.from_array(y, 5)
            a.fit(X2, y2)
            p1 = a.predict(X2)

    b = lightgbm.LGBMRegressor()
    b.fit(X, y)
    assert_eq(p1, b.predict(X))
예제 #2
0
def test_regressor_local_predict(client, listen_port):  # noqa
    X, y, w, dX, dy, dw = _create_data('regression', output='array')

    a = dlgbm.LGBMRegressor(local_listen_port=listen_port, seed=42)
    a = a.fit(dX, dy, sample_weight=dw, client=client)
    p1 = a.predict(dX)
    p2 = a.to_local().predict(X)
    s1 = r2_score(dy, p1)
    p1 = p1.compute()
    s2 = a.to_local().score(X, y)
    print(s1)

    # Predictions and scores should be the same
    assert_eq(p1, p2)
    np.isclose(s1, s2)
예제 #3
0
def test_regressor_local_predict(loop, listen_port):
    with cluster() as (s, [a, b]):
        with Client(s['address'], loop=loop):
            X, y, w, dX, dy, dw = _create_data('regression', output="array")

            a = dlgbm.LGBMRegressor(local_listen_port=listen_port, seed=42)
            a = a.fit(dX, dy, sample_weight=dw)
            p1 = a.predict(dX)
            p2 = a.to_local().predict(X)
            s1 = r2_score(dy, p1)
            p1 = p1.compute()
            s2 = a.to_local().score(X, y)
            print(s1)

            # Predictions and scores should be the same
            assert_eq(p1, p2)
            np.isclose(s1, s2)
예제 #4
0
def test_regressor_quantile(loop, output, listen_port, alpha):
    with cluster() as (s, [a, b]):
        with Client(s['address'], loop=loop) as client:
            X, y, w, dX, dy, dw = _create_data('regression', output=output)

            a = dlgbm.LGBMRegressor(local_listen_port=listen_port, seed=42, objective='quantile', alpha=alpha)
            a = a.fit(dX, dy, client=client, sample_weight=dw)
            p1 = a.predict(dX, client=client).compute()
            q1 = np.count_nonzero(y < p1) / y.shape[0]

            b = lightgbm.LGBMRegressor(seed=42, objective='quantile', alpha=alpha)
            b.fit(X, y, sample_weight=w)
            p2 = b.predict(X)
            q2 = np.count_nonzero(y < p2) / y.shape[0]

            # Quantiles should be right
            np.isclose(q1, alpha, atol=.1)
            np.isclose(q2, alpha, atol=.1)
예제 #5
0
def test_regress_newsread(client, listen_port):
    data = dd.read_csv("./system_tests/data/*.gz",
                       compression="gzip",
                       blocksize=None)
    dX = data.iloc[:, 1:]
    dy = data.iloc[:, 0]

    d_regress = dlgbm.LGBMRegressor(n_estimators=50,
                                    local_listen_port=listen_port)
    d_regress.fit(dX, dy)

    dy_pred = d_regress.predict(dX, client=client)

    # The dask_ml.metrics.r2_score method fails with dataframes so we compute the R2 score ourselves
    numerator = ((dy - dy_pred)**2).sum()
    denominator = ((dy - dy.mean())**2).sum()
    r2_score = 1 - numerator / denominator
    r2_score = r2_score.compute()
    print(r2_score)

    assert r2_score > 0.8
예제 #6
0
def test_regressor(output, client, listen_port):  # noqa
    X, y, w, dX, dy, dw = _create_data('regression', output=output)

    a = dlgbm.LGBMRegressor(time_out=5, local_listen_port=listen_port, seed=42)
    a = a.fit(dX, dy, client=client, sample_weight=dw)
    p1 = a.predict(dX, client=client)
    if output != 'dataframe':
        s1 = r2_score(dy, p1)
    p1 = p1.compute()

    b = lightgbm.LGBMRegressor(seed=42)
    b.fit(X, y, sample_weight=w)
    s2 = b.score(X, y)
    p2 = b.predict(X)

    # Scores should be the same
    if output != 'dataframe':
        assert_eq(s1, s2, atol=.01)

    # Predictions should be roughly the same
    assert_eq(y, p1, rtol=1., atol=50.)
    assert_eq(y, p2, rtol=1., atol=50.)