Ejemplo n.º 1
0
def test_mbsgd_regressor_default(datatype, nrows,
                                 column_info):
    ncols, n_info = column_info
    X, y = make_regression(n_samples=nrows, n_features=ncols,
                           n_informative=n_info, random_state=0)
    X = X.astype(datatype)
    y = y.astype(datatype)
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8,
                                                        random_state=0)

    cu_mbsgd_regressor = cumlMBSGRegressor()
    cu_mbsgd_regressor.fit(X_train, y_train)
    cu_pred = cu_mbsgd_regressor.predict(X_test).to_array()

    skl_sgd_regressor = SGDRegressor()
    skl_sgd_regressor.fit(X_train, y_train)
    skl_pred = skl_sgd_regressor.predict(X_test)

    cu_r2 = r2_score(cu_pred, y_test, convert_dtype=datatype)
    skl_r2 = r2_score(skl_pred, y_test, convert_dtype=datatype)
    try:
        assert abs(cu_r2 - skl_r2) <= 0.02
    except AssertionError:
        pytest.xfail("failed due to AssertionError error, "
                     "fix will be merged soon")
Ejemplo n.º 2
0
def test_mbsgd_regressor_vs_skl(lrate, penalty, make_dataset):
    nrows, datatype, X_train, X_test, y_train, y_test = make_dataset

    if nrows < 500000:

        cu_mbsgd_regressor = cumlMBSGRegressor(learning_rate=lrate,
                                               eta0=0.005,
                                               epochs=100,
                                               fit_intercept=True,
                                               batch_size=2,
                                               tol=0.0,
                                               penalty=penalty)

        cu_mbsgd_regressor.fit(X_train, y_train)
        cu_pred = cu_mbsgd_regressor.predict(X_test)
        cu_r2 = r2_score(cp.asnumpy(cu_pred),
                         cp.asnumpy(y_test),
                         convert_dtype=datatype)

        skl_sgd_regressor = SGDRegressor(learning_rate=lrate,
                                         eta0=0.005,
                                         max_iter=100,
                                         fit_intercept=True,
                                         tol=0.0,
                                         penalty=penalty,
                                         random_state=0)

        skl_sgd_regressor.fit(cp.asnumpy(X_train), cp.asnumpy(y_train).ravel())
        skl_pred = skl_sgd_regressor.predict(cp.asnumpy(X_test))
        skl_r2 = r2_score(skl_pred, cp.asnumpy(y_test), convert_dtype=datatype)
        assert abs(cu_r2 - skl_r2) <= 0.02
Ejemplo n.º 3
0
def test_mbsgd_regressor(datatype, lrate, input_type, penalty,
                         nrows, column_info):
    ncols, n_info = column_info
    X, y = make_regression(n_samples=nrows, n_features=ncols,
                           n_informative=n_info, random_state=0)
    X = X.astype(datatype)
    y = y.astype(datatype)
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8,
                                                        random_state=0)

    cu_mbsgd_regressor = cumlMBSGRegressor(learning_rate=lrate, eta0=0.005,
                                           epochs=100, fit_intercept=True,
                                           batch_size=2, tol=0.0,
                                           penalty=penalty)

    cu_mbsgd_regressor.fit(X_train, y_train)
    cu_pred = cu_mbsgd_regressor.predict(X_test).to_array()

    skl_sgd_regressor = SGDRegressor(learning_rate=lrate, eta0=0.005,
                                     max_iter=100, fit_intercept=True,
                                     tol=0.0, penalty=penalty,
                                     random_state=0)

    skl_sgd_regressor.fit(X_train, y_train)
    skl_pred = skl_sgd_regressor.predict(X_test)

    cu_r2 = r2_score(cu_pred, y_test, convert_dtype=datatype)
    skl_r2 = r2_score(skl_pred, y_test, convert_dtype=datatype)
    assert abs(cu_r2 - skl_r2) <= 0.02
def test_mbsgd_regressor(datatype, lrate, input_type, penalty, nrows, ncols):

    train_rows = int(nrows * 0.8)
    X, y = make_regression(n_samples=nrows, n_features=ncols, random_state=0)
    X_test = np.array(X[train_rows:, :], dtype=datatype)
    X_train = np.array(X[:train_rows, :], dtype=datatype)
    y_train = np.array(y[:train_rows, ], dtype=datatype)
    y_test = np.array(y[train_rows:, ], dtype=datatype)

    cu_mbsgd_regressor = cumlMBSGRegressor(learning_rate=lrate,
                                           eta0=0.005,
                                           epochs=100,
                                           fit_intercept=True,
                                           batch_size=2,
                                           tol=0.0,
                                           penalty=penalty)

    cu_mbsgd_regressor.fit(X_train, y_train)
    cu_pred = cu_mbsgd_regressor.predict(X_test).to_array()

    skl_sgd_regressor = SGDRegressor(learning_rate=lrate,
                                     eta0=0.005,
                                     max_iter=100,
                                     fit_intercept=True,
                                     tol=0.0,
                                     penalty=penalty,
                                     random_state=0)

    skl_sgd_regressor.fit(X_train, y_train)
    skl_pred = skl_sgd_regressor.predict(X_test)

    cu_r2 = r2_score(cu_pred, y_test)
    skl_r2 = r2_score(skl_pred, y_test)
    assert (cu_r2 - skl_r2 <= 0.02)
Ejemplo n.º 5
0
def test_mbsgd_regressor_set_params():
    x = np.linspace(0, 1, 50)
    y = x * 2

    model = cumlMBSGRegressor()
    model.fit(x, y)
    coef_before = model.coef_

    model = cumlMBSGRegressor(eta0=0.1, fit_intercept=False)
    model.fit(x, y)
    coef_after = model.coef_

    model = cumlMBSGRegressor()
    model.set_params(**{'eta0': 0.1, 'fit_intercept': False})
    model.fit(x, y)
    coef_test = model.coef_

    assert coef_before != coef_after
    assert coef_after == coef_test
Ejemplo n.º 6
0
def test_mbsgd_regressor_attributes():
    X, y = make_blobs()
    clf = cumlMBSGRegressor()
    clf.fit(X, y)

    attrs = ["dtype", "solver_model", "coef_", "intercept_",
             "l1_ratio", "n_cols", "loss", "eta0", "batch_size",
             "epochs"]
    for attr in attrs:
        assert hasattr(clf, attr)
Ejemplo n.º 7
0
def test_mbsgd_regressor_default(make_dataset):
    nrows, datatype, X_train, X_test, y_train, y_test = make_dataset

    cu_mbsgd_regressor = cumlMBSGRegressor(batch_size=nrows / 100)
    cu_mbsgd_regressor.fit(X_train, y_train)
    cu_pred = cu_mbsgd_regressor.predict(X_test)
    cu_r2 = r2_score(cp.asnumpy(cu_pred),
                     cp.asnumpy(y_test),
                     convert_dtype=datatype)

    assert cu_r2 > 0.9
Ejemplo n.º 8
0
def test_mbsgd_regressor_default(make_dataset):
    nrows, datatype, X_train, X_test, y_train, y_test = make_dataset

    cu_mbsgd_regressor = cumlMBSGRegressor()
    cu_mbsgd_regressor.fit(X_train, y_train)
    cu_pred = cu_mbsgd_regressor.predict(X_test)
    cu_r2 = r2_score(cu_pred, y_test, convert_dtype=datatype)

    if nrows < 500000:
        skl_sgd_regressor = SGDRegressor()
        skl_sgd_regressor.fit(X_train, y_train)
        skl_pred = skl_sgd_regressor.predict(X_test)
        skl_r2 = r2_score(skl_pred, y_test, convert_dtype=datatype)
        assert abs(cu_r2 - skl_r2) <= 0.02
Ejemplo n.º 9
0
def test_mbsgd_regressor(lrate, penalty, make_dataset):
    nrows, datatype, X_train, X_test, y_train, y_test = make_dataset

    cu_mbsgd_regressor = cumlMBSGRegressor(learning_rate=lrate,
                                           eta0=0.005,
                                           epochs=100,
                                           fit_intercept=True,
                                           batch_size=nrows / 100,
                                           tol=0.0,
                                           penalty=penalty)

    cu_mbsgd_regressor.fit(X_train, y_train)
    cu_pred = cu_mbsgd_regressor.predict(X_test)
    cu_r2 = r2_score(cu_pred, y_test, convert_dtype=datatype)

    assert cu_r2 >= 0.9
Ejemplo n.º 10
0
def test_mbsgd_regressor_default(datatype, nrows,
                                 column_info):
    ncols, n_info = column_info
    X, y = make_regression(n_samples=nrows, n_features=ncols,
                           n_informative=n_info, random_state=0)
    X = X.astype(datatype)
    y = y.astype(datatype)
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8,
                                                        random_state=0)

    cu_mbsgd_regressor = cumlMBSGRegressor()
    cu_mbsgd_regressor.fit(X_train, y_train)
    cu_pred = cu_mbsgd_regressor.predict(X_test).to_array()
    cu_r2 = r2_score(cu_pred, y_test, convert_dtype=datatype)

    if nrows < 500000:
        skl_sgd_regressor = SGDRegressor()
        skl_sgd_regressor.fit(X_train, y_train)
        skl_pred = skl_sgd_regressor.predict(X_test)
        skl_r2 = r2_score(skl_pred, y_test, convert_dtype=datatype)
        assert abs(cu_r2 - skl_r2) <= 0.02