Ejemplo n.º 1
0
def test_mbsgd_classifier_default(datatype, nrows, column_info):
    ncols, n_info = column_info
    X, y = make_classification(n_samples=nrows,
                               n_informative=n_info,
                               n_features=ncols,
                               random_state=0)
    X = X.astype(datatype)
    y = y.astype(datatype)
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        train_size=0.8,
                                                        random_state=0)

    y_train = y_train.astype(datatype)
    y_test = y_test.astype(datatype)

    cu_mbsgd_classifier = cumlMBSGClassifier()

    cu_mbsgd_classifier.fit(X_train, y_train)
    cu_pred = cu_mbsgd_classifier.predict(X_test).to_array()

    skl_sgd_classifier = SGDClassifier()

    skl_sgd_classifier.fit(X_train, y_train)
    skl_pred = skl_sgd_classifier.predict(X_test)

    cu_acc = accuracy_score(cu_pred, y_test)
    skl_acc = accuracy_score(skl_pred, y_test)
    assert cu_acc >= skl_acc - 0.05
Ejemplo n.º 2
0
def test_mbsgd_classifier_vs_skl(lrate, penalty, loss, make_dataset):
    nrows, X_train, X_test, y_train, y_test = make_dataset

    if nrows < 500000:
        cu_mbsgd_classifier = cumlMBSGClassifier(learning_rate=lrate,
                                                 eta0=0.005,
                                                 epochs=100,
                                                 fit_intercept=True,
                                                 batch_size=2,
                                                 tol=0.0,
                                                 penalty=penalty)

        cu_mbsgd_classifier.fit(X_train, y_train)
        cu_pred = cu_mbsgd_classifier.predict(X_test)
        cu_acc = accuracy_score(cp.asnumpy(cu_pred), cp.asnumpy(y_test))

        skl_sgd_classifier = SGDClassifier(learning_rate=lrate,
                                           eta0=0.005,
                                           max_iter=100,
                                           fit_intercept=True,
                                           tol=0.0,
                                           penalty=penalty,
                                           random_state=0)

        skl_sgd_classifier.fit(cp.asnumpy(X_train), cp.asnumpy(y_train))
        skl_pred = skl_sgd_classifier.predict(cp.asnumpy(X_test))
        skl_acc = accuracy_score(skl_pred, cp.asnumpy(y_test))
        assert cu_acc >= skl_acc - 0.08
Ejemplo n.º 3
0
def test_mbsgd_classifier(datatype, lrate, input_type, penalty,
                          loss, nrows, column_info):
    ncols, n_info = column_info
    X, y = make_classification(n_samples=nrows, n_informative=n_info,
                               n_features=ncols, random_state=0)
    X = X.astype(datatype)
    y = y.astype(datatype)
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8,
                                                        random_state=10)

    cu_mbsgd_classifier = cumlMBSGClassifier(learning_rate=lrate, eta0=0.005,
                                             epochs=100, fit_intercept=True,
                                             batch_size=2, tol=0.0,
                                             penalty=penalty)

    cu_mbsgd_classifier.fit(X_train, y_train)
    cu_pred = cu_mbsgd_classifier.predict(X_test).to_array()
    cu_acc = accuracy_score(cu_pred, y_test)

    if nrows < 500000:
        skl_sgd_classifier = SGDClassifier(learning_rate=lrate, eta0=0.005,
                                           max_iter=100, fit_intercept=True,
                                           tol=0.0, penalty=penalty,
                                           random_state=0)

        skl_sgd_classifier.fit(X_train, y_train)
        skl_pred = skl_sgd_classifier.predict(X_test)
        skl_acc = accuracy_score(skl_pred, y_test)
        assert cu_acc >= skl_acc - 0.06
Ejemplo n.º 4
0
def test_mbsgd_classifier_default(make_dataset):
    nrows, X_train, X_test, y_train, y_test = make_dataset

    cu_mbsgd_classifier = cumlMBSGClassifier(batch_size=nrows / 10)

    cu_mbsgd_classifier.fit(X_train, y_train)
    cu_pred = cu_mbsgd_classifier.predict(X_test)
    cu_acc = accuracy_score(cp.asnumpy(cu_pred), cp.asnumpy(y_test))

    assert cu_acc >= 0.69
Ejemplo n.º 5
0
def test_mbsgd_classifier_set_params():
    x = np.linspace(0, 1, 50)
    y = (x > 0.5).astype(cp.int32)

    model = cumlMBSGClassifier()
    model.fit(x, y)
    coef_before = model.coef_

    model = cumlMBSGClassifier(epochs=20, loss='hinge')
    model.fit(x, y)
    coef_after = model.coef_

    model = cumlMBSGClassifier()
    model.set_params(**{'epochs': 20, 'loss': 'hinge'})
    model.fit(x, y)
    coef_test = model.coef_

    assert coef_before != coef_after
    assert coef_after == coef_test
Ejemplo n.º 6
0
def test_mbsgd_classifier_default(make_dataset):
    nrows, X_train, X_test, y_train, y_test = make_dataset

    cu_mbsgd_classifier = cumlMBSGClassifier()

    cu_mbsgd_classifier.fit(X_train, y_train)
    cu_pred = cu_mbsgd_classifier.predict(X_test)
    cu_acc = accuracy_score(cu_pred, y_test)

    assert cu_acc >= 0.69
Ejemplo n.º 7
0
def test_mbsgd_classifier_attributes():
    X, y = make_blobs()
    clf = cumlMBSGClassifier()
    clf.fit(X, y)

    attrs = ["dtype", "solver_model", "coef_", "intercept_",
             "l1_ratio", "n_cols", "eta0", "batch_size",
             "fit_intercept", "penalty"]
    for attr in attrs:
        assert hasattr(clf, attr)
Ejemplo n.º 8
0
def test_mbsgd_classifier(lrate, penalty, loss, make_dataset):
    nrows, X_train, X_test, y_train, y_test = make_dataset

    cu_mbsgd_classifier = cumlMBSGClassifier(learning_rate=lrate, eta0=0.005,
                                             epochs=100, fit_intercept=True,
                                             batch_size=nrows/100, tol=0.0,
                                             penalty=penalty)

    cu_mbsgd_classifier.fit(X_train, y_train)
    cu_pred = cu_mbsgd_classifier.predict(X_test)
    cu_acc = accuracy_score(cp.asnumpy(cu_pred), cp.asnumpy(y_test))

    assert cu_acc > 0.79
Ejemplo n.º 9
0
def test_mbsgd_classifier_default(make_dataset):
    nrows, X_train, X_test, y_train, y_test = make_dataset

    cu_mbsgd_classifier = cumlMBSGClassifier()

    cu_mbsgd_classifier.fit(X_train, y_train)
    cu_pred = cu_mbsgd_classifier.predict(X_test).to_array()
    cu_acc = accuracy_score(cu_pred, y_test)

    if nrows < 500000:
        skl_sgd_classifier = SGDClassifier()

        skl_sgd_classifier.fit(X_train, y_train)
        skl_pred = skl_sgd_classifier.predict(X_test)
        skl_acc = accuracy_score(skl_pred, y_test)
        assert cu_acc >= skl_acc - 0.05
def test_mbsgd_classifier(datatype, lrate, input_type, penalty, loss, nrows,
                          ncols):

    train_rows = int(nrows * 0.8)
    X, y = make_classification(n_samples=nrows,
                               n_features=ncols,
                               random_state=0)
    X_test = np.array(X[train_rows:, :], dtype=datatype)
    X_train = np.array(X[:train_rows, :], dtype=datatype)
    y_train = np.array(y[:train_rows, ], dtype=datatype)
    y_test = np.array(y[train_rows:, ], dtype=datatype)

    cu_mbsgd_classifier = cumlMBSGClassifier(learning_rate=lrate,
                                             eta0=0.005,
                                             epochs=100,
                                             fit_intercept=True,
                                             batch_size=2,
                                             tol=0.0,
                                             penalty=penalty)

    cu_mbsgd_classifier.fit(X_train, y_train)
    cu_pred = cu_mbsgd_classifier.predict(X_test).to_array()

    skl_sgd_classifier = SGDClassifier(learning_rate=lrate,
                                       eta0=0.005,
                                       max_iter=100,
                                       fit_intercept=True,
                                       tol=0.0,
                                       penalty=penalty,
                                       random_state=0)

    skl_sgd_classifier.fit(X_train, y_train)
    skl_pred = skl_sgd_classifier.predict(X_test)

    cu_error = accuracy_score(cu_pred, y_test)
    skl_error = accuracy_score(skl_pred, y_test)
    assert (cu_error - skl_error <= 0.02)