예제 #1
0
def test_multiclass_classifier(loop):  # noqa
    # data
    iris = load_iris()
    X, y = iris.data, iris.target
    dX = da.from_array(X, 5)
    dy = da.from_array(y, 5)
    df = pd.DataFrame(X, columns=iris.feature_names)
    labels = pd.Series(y, name='target')

    ddf = dd.from_pandas(df, 2)
    dlabels = dd.from_pandas(labels, 2)
    # model
    a = lightgbm.LGBMClassifier()  # array
    b = dlgbm.LGBMClassifier(local_listen_port=13400)
    c = lightgbm.LGBMClassifier()  # frame
    d = dlgbm.LGBMClassifier(local_listen_port=14400)

    with cluster() as (s, [_, _]):
        with Client(s['address'], loop=loop):
            # fit
            a.fit(X, y)  # array
            b.fit(dX, dy)
            c.fit(df, labels)  # frame
            d.fit(ddf, dlabels)

            # check
            da.utils.assert_eq(a.predict(X), b.predict(dX))
            da.utils.assert_eq(a.predict_proba(X), b.predict_proba(dX))
            da.utils.assert_eq(c.predict(df), d.predict(ddf))
            da.utils.assert_eq(c.predict_proba(df), d.predict_proba(ddf))
예제 #2
0
def test_classifier_multi(kind, loop):
    if kind == 'array':
        X2 = da.from_array(X, 5)
        y2 = da.from_array(
            np.array([0, 1, 2, 0, 1, 2, 0, 0, 0, 1]),
            chunks=5,
        )
    else:
        X2 = dd.from_pandas(df, npartitions=2)
        y2 = dd.from_pandas(labels, npartitions=2)

    with cluster() as (s, [a, b]):
        with Client(s['address'], loop=loop):
            a = dlgbm.LGBMClassifier(n_estimators=10,
                                     objective="multiclass",
                                     local_listen_port=15400)
            a.fit(X2, y2)
            p1 = a.predict(X2)

            assert dask.is_dask_collection(p1)

            if kind == 'array':
                assert p1.shape == (10, )

            result = p1.compute()
            assert result.shape == (10, )

            # proba
            p2 = a.predict_proba(X2)
            assert dask.is_dask_collection(p2)

            if kind == 'array':
                assert p2.shape == (10, 3)
            assert p2.compute().shape == (10, 3)
예제 #3
0
def test_classifier(loop, output, listen_port, centers):
    with cluster() as (s, [a, b]):
        with Client(s['address'], loop=loop) as client:
            X, y, w, dX, dy, dw = _create_data('classification',
                                               output=output,
                                               centers=centers)

            a = dlgbm.LGBMClassifier(local_listen_port=listen_port)
            a = a.fit(dX, dy, sample_weight=dw)
            p1 = a.predict(dX, client=client)
            s1 = accuracy_score(dy, p1)
            p1 = p1.compute()

            b = lightgbm.LGBMClassifier()
            b.fit(X, y, sample_weight=w)
            p2 = b.predict(X)
            s2 = b.score(X, y)
            print(confusion_matrix(y, p1))
            print(confusion_matrix(y, p2))

            assert_eq(s1, s2)
            print(s1)

            assert_eq(p1, p2)
            assert_eq(y, p1)
            assert_eq(y, p2)
예제 #4
0
def test_classifier_local_predict(client, listen_port):  # noqa
    X, y, w, dX, dy, dw = _create_data('classification', output='array')

    a = dlgbm.LGBMClassifier(time_out=5, local_listen_port=listen_port)
    a = a.fit(dX, dy, sample_weight=dw, client=client)
    p1 = a.to_local().predict(dX)

    b = lightgbm.LGBMClassifier()
    b.fit(X, y, sample_weight=w)
    p2 = b.predict(X)

    assert_eq(p1, p2)
    assert_eq(y, p1)
    assert_eq(y, p2)
예제 #5
0
def test_classifier_proba(loop, output, listen_port, centers):
    with cluster() as (s, [a, b]):
        with Client(s['address'], loop=loop) as client:
            X, y, w, dX, dy, dw = _create_data(output=output, centers=centers)

            a = dlgbm.LGBMClassifier(local_listen_port=listen_port)
            a = a.fit(dX, dy, sample_weight=dw)
            p1 = a.predict_proba(dX, client=client)
            p1 = p1.compute()

            b = lightgbm.LGBMClassifier()
            b.fit(X, y, sample_weight=w)
            p2 = b.predict_proba(X)

            assert_eq(p1, p2, atol=0.3)
예제 #6
0
def test_classifier_proba(output, centers, client, listen_port):  # noqa
    X, y, w, dX, dy, dw = _create_data('classification',
                                       output=output,
                                       centers=centers)

    a = dlgbm.LGBMClassifier(time_out=5, local_listen_port=listen_port)
    a = a.fit(dX, dy, sample_weight=dw, client=client)
    p1 = a.predict_proba(dX, client=client)
    p1 = p1.compute()

    b = lightgbm.LGBMClassifier()
    b.fit(X, y, sample_weight=w)
    p2 = b.predict_proba(X)

    assert_eq(p1, p2, atol=0.3)
예제 #7
0
    def test_classify_newsread(self):
        data = dd.read_csv("./system_tests/data/*.gz",
                           compression="gzip",
                           blocksize=None)
        dX = data.iloc[:, :-1]
        dy = data.iloc[:, -1]

        d_classif = dlgbm.LGBMClassifier(n_estimators=50)
        d_classif.fit(dX, dy)

        dy_pred = d_classif.predict(dX)

        print(confusion_matrix(dy.compute(), dy_pred.compute()))

        self.assertGreaterEqual((dy == dy_pred).sum() / len(dy), 0.9)
예제 #8
0
def test_classifier_local_predict(loop): #noqa
    with cluster() as (s, [a, b]):
        with Client(s['address'], loop=loop):
            X, y, w, dX, dy, dw = _create_data(output="array")

            a = dlgbm.LGBMClassifier(local_listen_port=11400)
            a = a.fit(dX, dy, sample_weight=dw)
            p1 = a.to_local().predict(dX)

            b = lightgbm.LGBMClassifier()
            b.fit(X, y, sample_weight=w)
            p2 = b.predict(X)

            assert_eq(p1, p2)
            assert_eq(y, p1)
            assert_eq(y, p2)
예제 #9
0
def test_classify_newsread(client, listen_port):
    data = dd.read_csv("./system_tests/data/*.gz",
                       compression="gzip",
                       blocksize=None)
    dX = data.iloc[:, :-1]
    dy = data.iloc[:, -1]

    d_classif = dlgbm.LGBMClassifier(n_estimators=50,
                                     local_listen_port=listen_port)
    d_classif.fit(dX, dy)

    dy_pred = d_classif.predict(dX, client=client)

    acc_score = (dy == dy_pred).sum() / len(dy)
    acc_score = acc_score.compute()
    print(acc_score)

    assert acc_score > 0.8
예제 #10
0
def test_classifier(loop):
    with cluster() as (s, [a, b]):
        with Client(s['address'], loop=loop):
            a = dlgbm.LGBMClassifier(min_data=1,
                                     min_data_in_bin=1,
                                     min_child_samples=1,
                                     random_state=1,
                                     local_listen_port=12400)
            X2 = da.from_array(X, 2)
            y2 = da.from_array(y, 2)
            a = a.fit(X2, y2)
            p1 = a.predict(X2)

    b = lightgbm.LGBMClassifier(min_data=1,
                                min_data_in_bin=1,
                                min_child_samples=1,
                                random_state=1)
    b.fit(X, y)

    np.testing.assert_array_almost_equal(a.feature_importances_,
                                         b.feature_importances_)
    assert_eq(p1, b.predict(X))
예제 #11
0
def test_classifier(output, centers, client, listen_port):  # noqa
    X, y, w, dX, dy, dw = _create_data('classification',
                                       output=output,
                                       centers=centers)

    a = dlgbm.LGBMClassifier(time_out=5, local_listen_port=listen_port)
    a = a.fit(dX, dy, sample_weight=dw, client=client)
    p1 = a.predict(dX, client=client)
    s1 = accuracy_score(dy, p1)
    p1 = p1.compute()

    b = lightgbm.LGBMClassifier()
    b.fit(X, y, sample_weight=w)
    p2 = b.predict(X)
    s2 = b.score(X, y)
    print(confusion_matrix(y, p1))
    print(confusion_matrix(y, p2))

    assert_eq(s1, s2)
    print(s1)

    assert_eq(p1, p2)
    assert_eq(y, p1)
    assert_eq(y, p2)