def test_basic_fit_predict_sparse(x_dtype, y_dtype, nlp_20news): """ Cupy Test """ X, y = nlp_20news X = sparse_scipy_to_cp(X, x_dtype).astype(x_dtype) y = y.astype(y_dtype) # Priming it seems to lower the end-to-end runtime model = MultinomialNB() model.fit(X, y) cp.cuda.Stream.null.synchronize() with cp.prof.time_range(message="start", color_id=10): model = MultinomialNB() model.fit(X, y) y_hat = model.predict(X) y_hat = cp.asnumpy(y_hat) y = cp.asnumpy(y) assert accuracy_score(y, y_hat) >= 0.924
def test_naive_bayes(nlp_20news): X, y = nlp_20news X = sparse_scipy_to_cp(X, cp.float32).astype(cp.float32) y = y.astype(cp.int32) with cupy_using_allocator(dummy_allocator): model = MultinomialNB() model.fit(X, y) y_hat = model.predict(X) y_hat = model.predict(X) y_hat = model.predict_proba(X) y_hat = model.predict_log_proba(X) y_hat = model.score(X, y) del y_hat
def test_sparse_integral_dtype_fails(x_dtype, y_dtype, nlp_20news): X, y = nlp_20news X = X.astype(x_dtype) y = y.astype(y_dtype) # Priming it seems to lower the end-to-end runtime model = MultinomialNB() with pytest.raises(ValueError): model.fit(X, y) X = X.astype(cp.float32) model.fit(X, y) X = X.astype(x_dtype) with pytest.raises(ValueError): model.predict(X)
def test_basic_fit_predict_dense_numpy(x_dtype, y_dtype, nlp_20news): """ Cupy Test """ X, y = nlp_20news X = sparse_scipy_to_cp(X, cp.float32) y = y.astype(y_dtype) X = X.tocsr()[0:500].todense() y = y[:500] model = MultinomialNB() model.fit(np.ascontiguousarray(cp.asnumpy(X).astype(x_dtype)), y) y_hat = model.predict(X) y_hat = cp.asnumpy(y_hat) y = cp.asnumpy(y) accuracy_score(y, y_hat) >= 0.911
def test_multinomial_basic_fit_predict_dense_numpy(x_dtype, y_dtype, nlp_20news): """ Cupy Test """ X, y = nlp_20news n_rows = 500 X = sparse_scipy_to_cp(X, cp.float32).tocsr()[:n_rows] y = y[:n_rows].astype(y_dtype) model = MultinomialNB() model.fit(np.ascontiguousarray(cp.asnumpy(X.todense()).astype(x_dtype)), y) y_hat = model.predict(X).get() modelsk = skNB() modelsk.fit(X.get(), y.get()) y_sk = model.predict(X.get()) assert_allclose(y_hat, y_sk)
def test_predict_log_proba(x_dtype, y_dtype, nlp_20news): X, y = nlp_20news cu_X = sparse_scipy_to_cp(X, x_dtype).astype(x_dtype) cu_y = y.astype(y_dtype) cu_X = cu_X.tocsr() y = y.get() cuml_model = MultinomialNB() sk_model = skNB() cuml_model.fit(cu_X, cu_y) sk_model.fit(X, y) cuml_proba = cuml_model.predict_log_proba(cu_X).get() sk_proba = sk_model.predict_log_proba(X) assert_allclose(cuml_proba, sk_proba, atol=1e-2, rtol=1e-2)
def test_score(x_dtype, y_dtype, nlp_20news): X, y = nlp_20news cu_X = sparse_scipy_to_cp(X, x_dtype).astype(x_dtype) cu_y = y.astype(y_dtype) cu_X = cu_X.tocsr() y = y.get() cuml_model = MultinomialNB() sk_model = skNB() cuml_model.fit(cu_X, cu_y) sk_model.fit(X, y) cuml_score = cuml_model.score(cu_X, cu_y) sk_score = sk_model.score(X, y) THRES = 1e-4 assert sk_score - THRES <= cuml_score <= sk_score + THRES