def test_multiclass_classifier(loop): # noqa # data iris = load_iris() X, y = iris.data, iris.target dX = da.from_array(X, 5) dy = da.from_array(y, 5) df = pd.DataFrame(X, columns=iris.feature_names) labels = pd.Series(y, name='target') ddf = dd.from_pandas(df, 2) dlabels = dd.from_pandas(labels, 2) # model a = lightgbm.LGBMClassifier() # array b = dlgbm.LGBMClassifier(local_listen_port=13400) c = lightgbm.LGBMClassifier() # frame d = dlgbm.LGBMClassifier(local_listen_port=14400) with cluster() as (s, [_, _]): with Client(s['address'], loop=loop): # fit a.fit(X, y) # array b.fit(dX, dy) c.fit(df, labels) # frame d.fit(ddf, dlabels) # check da.utils.assert_eq(a.predict(X), b.predict(dX)) da.utils.assert_eq(a.predict_proba(X), b.predict_proba(dX)) da.utils.assert_eq(c.predict(df), d.predict(ddf)) da.utils.assert_eq(c.predict_proba(df), d.predict_proba(ddf))
def test_classifier_multi(kind, loop): if kind == 'array': X2 = da.from_array(X, 5) y2 = da.from_array( np.array([0, 1, 2, 0, 1, 2, 0, 0, 0, 1]), chunks=5, ) else: X2 = dd.from_pandas(df, npartitions=2) y2 = dd.from_pandas(labels, npartitions=2) with cluster() as (s, [a, b]): with Client(s['address'], loop=loop): a = dlgbm.LGBMClassifier(n_estimators=10, objective="multiclass", local_listen_port=15400) a.fit(X2, y2) p1 = a.predict(X2) assert dask.is_dask_collection(p1) if kind == 'array': assert p1.shape == (10, ) result = p1.compute() assert result.shape == (10, ) # proba p2 = a.predict_proba(X2) assert dask.is_dask_collection(p2) if kind == 'array': assert p2.shape == (10, 3) assert p2.compute().shape == (10, 3)
def test_classifier(loop, output, listen_port, centers): with cluster() as (s, [a, b]): with Client(s['address'], loop=loop) as client: X, y, w, dX, dy, dw = _create_data('classification', output=output, centers=centers) a = dlgbm.LGBMClassifier(local_listen_port=listen_port) a = a.fit(dX, dy, sample_weight=dw) p1 = a.predict(dX, client=client) s1 = accuracy_score(dy, p1) p1 = p1.compute() b = lightgbm.LGBMClassifier() b.fit(X, y, sample_weight=w) p2 = b.predict(X) s2 = b.score(X, y) print(confusion_matrix(y, p1)) print(confusion_matrix(y, p2)) assert_eq(s1, s2) print(s1) assert_eq(p1, p2) assert_eq(y, p1) assert_eq(y, p2)
def test_classifier_local_predict(client, listen_port): # noqa X, y, w, dX, dy, dw = _create_data('classification', output='array') a = dlgbm.LGBMClassifier(time_out=5, local_listen_port=listen_port) a = a.fit(dX, dy, sample_weight=dw, client=client) p1 = a.to_local().predict(dX) b = lightgbm.LGBMClassifier() b.fit(X, y, sample_weight=w) p2 = b.predict(X) assert_eq(p1, p2) assert_eq(y, p1) assert_eq(y, p2)
def test_classifier_proba(loop, output, listen_port, centers): with cluster() as (s, [a, b]): with Client(s['address'], loop=loop) as client: X, y, w, dX, dy, dw = _create_data(output=output, centers=centers) a = dlgbm.LGBMClassifier(local_listen_port=listen_port) a = a.fit(dX, dy, sample_weight=dw) p1 = a.predict_proba(dX, client=client) p1 = p1.compute() b = lightgbm.LGBMClassifier() b.fit(X, y, sample_weight=w) p2 = b.predict_proba(X) assert_eq(p1, p2, atol=0.3)
def test_classifier_proba(output, centers, client, listen_port): # noqa X, y, w, dX, dy, dw = _create_data('classification', output=output, centers=centers) a = dlgbm.LGBMClassifier(time_out=5, local_listen_port=listen_port) a = a.fit(dX, dy, sample_weight=dw, client=client) p1 = a.predict_proba(dX, client=client) p1 = p1.compute() b = lightgbm.LGBMClassifier() b.fit(X, y, sample_weight=w) p2 = b.predict_proba(X) assert_eq(p1, p2, atol=0.3)
def test_classify_newsread(self): data = dd.read_csv("./system_tests/data/*.gz", compression="gzip", blocksize=None) dX = data.iloc[:, :-1] dy = data.iloc[:, -1] d_classif = dlgbm.LGBMClassifier(n_estimators=50) d_classif.fit(dX, dy) dy_pred = d_classif.predict(dX) print(confusion_matrix(dy.compute(), dy_pred.compute())) self.assertGreaterEqual((dy == dy_pred).sum() / len(dy), 0.9)
def test_classifier_local_predict(loop): #noqa with cluster() as (s, [a, b]): with Client(s['address'], loop=loop): X, y, w, dX, dy, dw = _create_data(output="array") a = dlgbm.LGBMClassifier(local_listen_port=11400) a = a.fit(dX, dy, sample_weight=dw) p1 = a.to_local().predict(dX) b = lightgbm.LGBMClassifier() b.fit(X, y, sample_weight=w) p2 = b.predict(X) assert_eq(p1, p2) assert_eq(y, p1) assert_eq(y, p2)
def test_classify_newsread(client, listen_port): data = dd.read_csv("./system_tests/data/*.gz", compression="gzip", blocksize=None) dX = data.iloc[:, :-1] dy = data.iloc[:, -1] d_classif = dlgbm.LGBMClassifier(n_estimators=50, local_listen_port=listen_port) d_classif.fit(dX, dy) dy_pred = d_classif.predict(dX, client=client) acc_score = (dy == dy_pred).sum() / len(dy) acc_score = acc_score.compute() print(acc_score) assert acc_score > 0.8
def test_classifier(loop): with cluster() as (s, [a, b]): with Client(s['address'], loop=loop): a = dlgbm.LGBMClassifier(min_data=1, min_data_in_bin=1, min_child_samples=1, random_state=1, local_listen_port=12400) X2 = da.from_array(X, 2) y2 = da.from_array(y, 2) a = a.fit(X2, y2) p1 = a.predict(X2) b = lightgbm.LGBMClassifier(min_data=1, min_data_in_bin=1, min_child_samples=1, random_state=1) b.fit(X, y) np.testing.assert_array_almost_equal(a.feature_importances_, b.feature_importances_) assert_eq(p1, b.predict(X))
def test_classifier(output, centers, client, listen_port): # noqa X, y, w, dX, dy, dw = _create_data('classification', output=output, centers=centers) a = dlgbm.LGBMClassifier(time_out=5, local_listen_port=listen_port) a = a.fit(dX, dy, sample_weight=dw, client=client) p1 = a.predict(dX, client=client) s1 = accuracy_score(dy, p1) p1 = p1.compute() b = lightgbm.LGBMClassifier() b.fit(X, y, sample_weight=w) p2 = b.predict(X) s2 = b.score(X, y) print(confusion_matrix(y, p1)) print(confusion_matrix(y, p2)) assert_eq(s1, s2) print(s1) assert_eq(p1, p2) assert_eq(y, p1) assert_eq(y, p2)