Beispiel #1
0
def test_bernoulli(x_dtype, y_dtype, is_sparse, nlp_20news):
    X, y = nlp_20news
    n_rows = 500

    X = sparse_scipy_to_cp(X, x_dtype).astype(x_dtype)
    y = y.astype(y_dtype)

    X = X.tocsr()[:n_rows]
    y = y[:n_rows]
    if not is_sparse:
        X = X.todense()

    sk_model = skBNB()
    cuml_model = BernoulliNB()

    sk_model.fit(X.get(), y.get())
    cuml_model.fit(X, y)

    sk_score = sk_model.score(X.get(), y.get())
    cuml_score = cuml_model.score(X, y)
    cuml_proba = cuml_model.predict_log_proba(X).get()
    sk_proba = sk_model.predict_log_proba(X.get())

    THRES = 1e-3

    assert_array_equal(sk_model.class_count_, cuml_model.class_count_.get())
    assert_allclose(sk_model.class_log_prior_,
                    cuml_model.class_log_prior_.get(), 1e-6)
    assert_allclose(cuml_proba, sk_proba, atol=1e-2, rtol=1e-2)
    assert sk_score - THRES <= cuml_score <= sk_score + THRES
Beispiel #2
0
def test_bernoulli_partial_fit(x_dtype, y_dtype, nlp_20news):
    chunk_size = 500
    n_rows = 1500

    X, y = nlp_20news

    X = sparse_scipy_to_cp(X, x_dtype).astype(x_dtype)
    y = y.astype(y_dtype)[:n_rows]

    X = X.tocsr()[:n_rows]

    model = BernoulliNB()
    modelsk = skBNB()

    classes = np.unique(y)

    for i in range(math.ceil(X.shape[0] / chunk_size)):

        upper = i * chunk_size + chunk_size
        if upper > X.shape[0]:
            upper = -1

        if upper > 0:
            x = X[i * chunk_size:upper]
            y_c = y[i * chunk_size:upper]
        else:
            x = X[i * chunk_size:]
            y_c = y[i * chunk_size:]

        model.partial_fit(x, y_c, classes=classes)
        modelsk.partial_fit(x.get(), y_c.get(), classes=classes.get())
        if upper == -1:
            break

    y_hat = model.predict(X).get()
    y_sk = modelsk.predict(X.get())

    assert_allclose(y_hat, y_sk)
from sklearn.metrics import classification_report
from sklearn.naive_bayes import BernoulliNB as skBNB
from smlib.bayes.nb import BernoulliNB

X_train = np.array([[1, 1, 0], [1, 0, 0], [1, 1, 1], [0, 0, 0], [0, 0, 1]])

y_train = np.array([1, 0, 1, 0, 0])

X_test = np.array([
    [1, 1, 0],
    [0, 1, 0],
])

y_test = np.array([1, 0])

for classifier in [skBNB(), BernoulliNB()]:
    classifier.fit(X_train, y_train)

    print(classifier)
    expected = y_test
    predicted = classifier.predict(X_test)
    print(classification_report(expected, predicted))

print('-' * 50)

X = np.random.randint(2, size=(6, 100))
Y = np.array([1, 2, 3, 4, 4, 5])
skbnb = skBNB()
skbnb.fit(X, Y)
print(skbnb.predict(X[2:3]))