Esempio n. 1
0
def test_gaussian_parameters(priors, var_smoothing, nlp_20news):
    x_dtype = cp.float32
    y_dtype = cp.int32
    nrows = 150

    X, y = nlp_20news

    X = sparse_scipy_to_cp(X[:nrows], x_dtype).todense()
    y = y.astype(y_dtype)[:nrows]

    if priors == 'balanced':
        priors = cp.array([1 / 20] * 20)
    elif priors == 'unbalanced':
        priors = cp.linspace(0.01, 0.09, 20)

    model = GaussianNB(priors=priors, var_smoothing=var_smoothing)
    model_sk = skGNB(priors=priors.get() if priors is not None else None,
                     var_smoothing=var_smoothing)
    model.fit(X, y)
    model_sk.fit(X.get(), y.get())

    y_hat = model.predict(X)
    y_hat_sk = model_sk.predict(X.get())
    y_hat = cp.asnumpy(y_hat)
    y = cp.asnumpy(y)

    assert_allclose(model.epsilon_.get(), model_sk.epsilon_, rtol=1e-4)
    assert_array_equal(y_hat, y_hat_sk)
Esempio n. 2
0
def test_gaussian_fit_predict(x_dtype, y_dtype, is_sparse, nlp_20news):
    """
    Cupy Test
    """

    X, y = nlp_20news
    model = GaussianNB()
    n_rows = 500
    n_cols = int(2e5)
    X = sparse_scipy_to_cp(X, x_dtype)
    X = X.tocsr()[:n_rows, :n_cols]

    if is_sparse:
        y = y.astype(y_dtype)[:n_rows]
        model.fit(X, y)
    else:
        X = X.todense()
        y = y[:n_rows].astype(y_dtype)
        model.fit(np.ascontiguousarray(cp.asnumpy(X).astype(x_dtype)), y)

    y_hat = model.predict(X)
    y_hat = cp.asnumpy(y_hat)
    y = cp.asnumpy(y)

    assert accuracy_score(y, y_hat) >= 0.99
Esempio n. 3
0
def test_gaussian_basic():
    # Data is just 6 separable points in the plane
    X = cp.array([[-2, -1, -1], [-1, -1, -1], [-1, -2, -1], [1, 1, 1],
                  [1, 2, 1], [2, 1, 1]],
                 dtype=cp.float32)
    y = cp.array([1, 1, 1, 2, 2, 2])

    skclf = skGNB()
    skclf.fit(X.get(), y.get())

    clf = GaussianNB()
    clf.fit(X, y)

    assert_array_almost_equal(clf.theta_.get(), skclf.theta_, 6)
    assert_array_almost_equal(clf.sigma_.get(), skclf.sigma_, 6)

    y_pred = clf.predict(X)
    y_pred_proba = clf.predict_proba(X)
    y_pred_log_proba = clf.predict_log_proba(X)
    y_pred_proba_sk = skclf.predict_proba(X.get())
    y_pred_log_proba_sk = skclf.predict_log_proba(X.get())

    assert_array_equal(y_pred.get(), y.get())
    assert_array_almost_equal(y_pred_proba.get(), y_pred_proba_sk, 8)
    assert_allclose(y_pred_log_proba.get(),
                    y_pred_log_proba_sk,
                    atol=1e-2,
                    rtol=1e-2)
Esempio n. 4
0
def test_gaussian_partial_fit(nlp_20news):
    chunk_size = 250
    n_rows = 1500
    x_dtype, y_dtype = cp.float32, cp.int32

    X, y = nlp_20news

    X = sparse_scipy_to_cp(X, x_dtype).tocsr()[:n_rows]
    y = y.astype(y_dtype)[:n_rows]

    model = GaussianNB()

    classes = np.unique(y)

    total_fit = 0

    for i in range(math.ceil(X.shape[0] / chunk_size)):

        upper = i * chunk_size + chunk_size
        if upper > X.shape[0]:
            upper = -1

        if upper > 0:
            x = X[i * chunk_size:upper]
            y_c = y[i * chunk_size:upper]
        else:
            x = X[i * chunk_size:]
            y_c = y[i * chunk_size:]

        model.partial_fit(x, y_c, classes=classes)

        total_fit += (upper - (i * chunk_size))
        if upper == -1:
            break

    y_hat = model.predict(X)

    y_hat = cp.asnumpy(y_hat)
    y = cp.asnumpy(y)
    assert accuracy_score(y, y_hat) >= 0.99

    # Test whether label mismatch between target y and classes raises an Error
    assert_raises(ValueError,
                  GaussianNB().partial_fit,
                  X,
                  y,
                  classes=cp.array([0, 1]))
    # Raise because classes is required on first call of partial_fit
    assert_raises(ValueError, GaussianNB().partial_fit, X, y)