Beispiel #1
0
def test_gaussian_fit_predict(x_dtype, y_dtype, is_sparse, nlp_20news):
    """
    Cupy Test
    """

    X, y = nlp_20news
    model = GaussianNB()
    n_rows = 500
    n_cols = int(2e5)
    X = sparse_scipy_to_cp(X, x_dtype)
    X = X.tocsr()[:n_rows, :n_cols]

    if is_sparse:
        y = y.astype(y_dtype)[:n_rows]
        model.fit(X, y)
    else:
        X = X.todense()
        y = y[:n_rows].astype(y_dtype)
        model.fit(np.ascontiguousarray(cp.asnumpy(X).astype(x_dtype)), y)

    y_hat = model.predict(X)
    y_hat = cp.asnumpy(y_hat)
    y = cp.asnumpy(y)

    assert accuracy_score(y, y_hat) >= 0.99
Beispiel #2
0
def test_gaussian_parameters(priors, var_smoothing, nlp_20news):
    x_dtype = cp.float32
    y_dtype = cp.int32
    nrows = 150

    X, y = nlp_20news

    X = sparse_scipy_to_cp(X[:nrows], x_dtype).todense()
    y = y.astype(y_dtype)[:nrows]

    if priors == 'balanced':
        priors = cp.array([1 / 20] * 20)
    elif priors == 'unbalanced':
        priors = cp.linspace(0.01, 0.09, 20)

    model = GaussianNB(priors=priors, var_smoothing=var_smoothing)
    model_sk = skGNB(priors=priors.get() if priors is not None else None,
                     var_smoothing=var_smoothing)
    model.fit(X, y)
    model_sk.fit(X.get(), y.get())

    y_hat = model.predict(X)
    y_hat_sk = model_sk.predict(X.get())
    y_hat = cp.asnumpy(y_hat)
    y = cp.asnumpy(y)

    assert_allclose(model.epsilon_.get(), model_sk.epsilon_, rtol=1e-4)
    assert_array_equal(y_hat, y_hat_sk)
Beispiel #3
0
def test_gaussian_basic():
    # Data is just 6 separable points in the plane
    X = cp.array([[-2, -1, -1], [-1, -1, -1], [-1, -2, -1], [1, 1, 1],
                  [1, 2, 1], [2, 1, 1]],
                 dtype=cp.float32)
    y = cp.array([1, 1, 1, 2, 2, 2])

    skclf = skGNB()
    skclf.fit(X.get(), y.get())

    clf = GaussianNB()
    clf.fit(X, y)

    assert_array_almost_equal(clf.theta_.get(), skclf.theta_, 6)
    assert_array_almost_equal(clf.sigma_.get(), skclf.sigma_, 6)

    y_pred = clf.predict(X)
    y_pred_proba = clf.predict_proba(X)
    y_pred_log_proba = clf.predict_log_proba(X)
    y_pred_proba_sk = skclf.predict_proba(X.get())
    y_pred_log_proba_sk = skclf.predict_log_proba(X.get())

    assert_array_equal(y_pred.get(), y.get())
    assert_array_almost_equal(y_pred_proba.get(), y_pred_proba_sk, 8)
    assert_allclose(y_pred_log_proba.get(),
                    y_pred_log_proba_sk,
                    atol=1e-2,
                    rtol=1e-2)