def test_bernoulli(x_dtype, y_dtype, is_sparse, nlp_20news): X, y = nlp_20news n_rows = 500 X = sparse_scipy_to_cp(X, x_dtype).astype(x_dtype) y = y.astype(y_dtype) X = X.tocsr()[:n_rows] y = y[:n_rows] if not is_sparse: X = X.todense() sk_model = skBNB() cuml_model = BernoulliNB() sk_model.fit(X.get(), y.get()) cuml_model.fit(X, y) sk_score = sk_model.score(X.get(), y.get()) cuml_score = cuml_model.score(X, y) cuml_proba = cuml_model.predict_log_proba(X).get() sk_proba = sk_model.predict_log_proba(X.get()) THRES = 1e-3 assert_array_equal(sk_model.class_count_, cuml_model.class_count_.get()) assert_allclose(sk_model.class_log_prior_, cuml_model.class_log_prior_.get(), 1e-6) assert_allclose(cuml_proba, sk_proba, atol=1e-2, rtol=1e-2) assert sk_score - THRES <= cuml_score <= sk_score + THRES
def test_bernoulli_partial_fit(x_dtype, y_dtype, nlp_20news): chunk_size = 500 n_rows = 1500 X, y = nlp_20news X = sparse_scipy_to_cp(X, x_dtype).astype(x_dtype) y = y.astype(y_dtype)[:n_rows] X = X.tocsr()[:n_rows] model = BernoulliNB() modelsk = skBNB() classes = np.unique(y) for i in range(math.ceil(X.shape[0] / chunk_size)): upper = i * chunk_size + chunk_size if upper > X.shape[0]: upper = -1 if upper > 0: x = X[i * chunk_size:upper] y_c = y[i * chunk_size:upper] else: x = X[i * chunk_size:] y_c = y[i * chunk_size:] model.partial_fit(x, y_c, classes=classes) modelsk.partial_fit(x.get(), y_c.get(), classes=classes.get()) if upper == -1: break y_hat = model.predict(X).get() y_sk = modelsk.predict(X.get()) assert_allclose(y_hat, y_sk)
from sklearn.metrics import classification_report from sklearn.naive_bayes import BernoulliNB as skBNB from smlib.bayes.nb import BernoulliNB X_train = np.array([[1, 1, 0], [1, 0, 0], [1, 1, 1], [0, 0, 0], [0, 0, 1]]) y_train = np.array([1, 0, 1, 0, 0]) X_test = np.array([ [1, 1, 0], [0, 1, 0], ]) y_test = np.array([1, 0]) for classifier in [skBNB(), BernoulliNB()]: classifier.fit(X_train, y_train) print(classifier) expected = y_test predicted = classifier.predict(X_test) print(classification_report(expected, predicted)) print('-' * 50) X = np.random.randint(2, size=(6, 100)) Y = np.array([1, 2, 3, 4, 4, 5]) skbnb = skBNB() skbnb.fit(X, Y) print(skbnb.predict(X[2:3]))