コード例 #1
0
ファイル: test_naive_bayes.py プロジェクト: daxiongshu/cuml
def test_categorical_parameters(class_prior, alpha, fit_prior, is_sparse,
                                nlp_20news):
    x_dtype = cp.float32
    y_dtype = cp.int32
    nrows = 2000
    ncols = 500

    X, y = nlp_20news

    X = sparse_scipy_to_cp(X, x_dtype).tocsr()[:nrows, :ncols]
    if not is_sparse:
        X = X.todense()
    y = y.astype(y_dtype)[:nrows]

    if class_prior == 'balanced':
        class_prior = np.array([1 / 20] * 20)
    elif class_prior == 'unbalanced':
        class_prior = np.linspace(0.01, 0.09, 20)

    model = CategoricalNB(class_prior=class_prior,
                          alpha=alpha,
                          fit_prior=fit_prior)
    model_sk = skCNB(class_prior=class_prior, alpha=alpha, fit_prior=fit_prior)
    model.fit(X, y)
    y_hat = model.predict(X).get()
    y_log_prob = model.predict_log_proba(X).get()

    X = X.todense().get() if is_sparse else X.get()
    model_sk.fit(X, y.get())
    y_hat_sk = model_sk.predict(X)
    y_log_prob_sk = model_sk.predict_log_proba(X)

    assert_allclose(y_log_prob, y_log_prob_sk, rtol=1e-4)
    assert_array_equal(y_hat, y_hat_sk)
コード例 #2
0
ファイル: test_naive_bayes.py プロジェクト: st071300/cuML
def test_basic_fit_predict_sparse(x_dtype, y_dtype, nlp_20news):
    """
    Cupy Test
    """

    X, y = nlp_20news

    X = sparse_scipy_to_cp(X, x_dtype).astype(x_dtype)
    y = y.astype(y_dtype)

    # Priming it seems to lower the end-to-end runtime
    model = MultinomialNB()
    model.fit(X, y)

    cp.cuda.Stream.null.synchronize()

    with cp.prof.time_range(message="start", color_id=10):
        model = MultinomialNB()
        model.fit(X, y)

    y_hat = model.predict(X)

    y_hat = cp.asnumpy(y_hat)
    y = cp.asnumpy(y)

    assert accuracy_score(y, y_hat) >= 0.924
コード例 #3
0
ファイル: test_naive_bayes.py プロジェクト: daxiongshu/cuml
def test_categorical(x_dtype, y_dtype, is_sparse, nlp_20news):
    if x_dtype == cp.int32 and is_sparse:
        pytest.skip("Sparse matrices with integers dtype are not supported")
    X, y = nlp_20news
    n_rows = 2000
    n_cols = 500

    X = sparse_scipy_to_cp(X, dtype=cp.float32)
    X = X.tocsr()[:n_rows, :n_cols]
    y = y.astype(y_dtype)[:n_rows]

    if not is_sparse:
        X = X.todense()
    X = X.astype(x_dtype)
    cuml_model = CategoricalNB()
    cuml_model.fit(X, y)
    cuml_score = cuml_model.score(X, y)
    cuml_proba = cuml_model.predict_log_proba(X).get()

    X = X.todense().get() if is_sparse else X.get()
    y = y.get()
    sk_model = skCNB()
    sk_model.fit(X, y)
    sk_score = sk_model.score(X, y)
    sk_proba = sk_model.predict_log_proba(X)

    THRES = 1e-3

    assert_array_equal(sk_model.class_count_, cuml_model.class_count_.get())
    assert_allclose(sk_model.class_log_prior_,
                    cuml_model.class_log_prior_.get(), 1e-6)
    assert_allclose(cuml_proba, sk_proba, atol=1e-2, rtol=1e-2)
    assert sk_score - THRES <= cuml_score <= sk_score + THRES
コード例 #4
0
ファイル: test_naive_bayes.py プロジェクト: daxiongshu/cuml
def test_gaussian_parameters(priors, var_smoothing, nlp_20news):
    x_dtype = cp.float32
    y_dtype = cp.int32
    nrows = 150

    X, y = nlp_20news

    X = sparse_scipy_to_cp(X[:nrows], x_dtype).todense()
    y = y.astype(y_dtype)[:nrows]

    if priors == 'balanced':
        priors = cp.array([1 / 20] * 20)
    elif priors == 'unbalanced':
        priors = cp.linspace(0.01, 0.09, 20)

    model = GaussianNB(priors=priors, var_smoothing=var_smoothing)
    model_sk = skGNB(priors=priors.get() if priors is not None else None,
                     var_smoothing=var_smoothing)
    model.fit(X, y)
    model_sk.fit(X.get(), y.get())

    y_hat = model.predict(X)
    y_hat_sk = model_sk.predict(X.get())
    y_hat = cp.asnumpy(y_hat)
    y = cp.asnumpy(y)

    assert_allclose(model.epsilon_.get(), model_sk.epsilon_, rtol=1e-4)
    assert_array_equal(y_hat, y_hat_sk)
コード例 #5
0
ファイル: test_naive_bayes.py プロジェクト: daxiongshu/cuml
def test_gaussian_fit_predict(x_dtype, y_dtype, is_sparse, nlp_20news):
    """
    Cupy Test
    """

    X, y = nlp_20news
    model = GaussianNB()
    n_rows = 500
    n_cols = int(2e5)
    X = sparse_scipy_to_cp(X, x_dtype)
    X = X.tocsr()[:n_rows, :n_cols]

    if is_sparse:
        y = y.astype(y_dtype)[:n_rows]
        model.fit(X, y)
    else:
        X = X.todense()
        y = y[:n_rows].astype(y_dtype)
        model.fit(np.ascontiguousarray(cp.asnumpy(X).astype(x_dtype)), y)

    y_hat = model.predict(X)
    y_hat = cp.asnumpy(y_hat)
    y = cp.asnumpy(y)

    assert accuracy_score(y, y_hat) >= 0.99
コード例 #6
0
ファイル: test_naive_bayes.py プロジェクト: daxiongshu/cuml
def test_bernoulli(x_dtype, y_dtype, is_sparse, nlp_20news):
    X, y = nlp_20news
    n_rows = 500

    X = sparse_scipy_to_cp(X, x_dtype).astype(x_dtype)
    y = y.astype(y_dtype)

    X = X.tocsr()[:n_rows]
    y = y[:n_rows]
    if not is_sparse:
        X = X.todense()

    sk_model = skBNB()
    cuml_model = BernoulliNB()

    sk_model.fit(X.get(), y.get())
    cuml_model.fit(X, y)

    sk_score = sk_model.score(X.get(), y.get())
    cuml_score = cuml_model.score(X, y)
    cuml_proba = cuml_model.predict_log_proba(X).get()
    sk_proba = sk_model.predict_log_proba(X.get())

    THRES = 1e-3

    assert_array_equal(sk_model.class_count_, cuml_model.class_count_.get())
    assert_allclose(sk_model.class_log_prior_,
                    cuml_model.class_log_prior_.get(), 1e-6)
    assert_allclose(cuml_proba, sk_proba, atol=1e-2, rtol=1e-2)
    assert sk_score - THRES <= cuml_score <= sk_score + THRES
コード例 #7
0
ファイル: test_naive_bayes.py プロジェクト: daxiongshu/cuml
def test_gaussian_partial_fit(nlp_20news):
    chunk_size = 250
    n_rows = 1500
    x_dtype, y_dtype = cp.float32, cp.int32

    X, y = nlp_20news

    X = sparse_scipy_to_cp(X, x_dtype).tocsr()[:n_rows]
    y = y.astype(y_dtype)[:n_rows]

    model = GaussianNB()

    classes = np.unique(y)

    total_fit = 0

    for i in range(math.ceil(X.shape[0] / chunk_size)):

        upper = i * chunk_size + chunk_size
        if upper > X.shape[0]:
            upper = -1

        if upper > 0:
            x = X[i * chunk_size:upper]
            y_c = y[i * chunk_size:upper]
        else:
            x = X[i * chunk_size:]
            y_c = y[i * chunk_size:]

        model.partial_fit(x, y_c, classes=classes)

        total_fit += (upper - (i * chunk_size))
        if upper == -1:
            break

    y_hat = model.predict(X)

    y_hat = cp.asnumpy(y_hat)
    y = cp.asnumpy(y)
    assert accuracy_score(y, y_hat) >= 0.99

    # Test whether label mismatch between target y and classes raises an Error
    assert_raises(ValueError,
                  GaussianNB().partial_fit,
                  X,
                  y,
                  classes=cp.array([0, 1]))
    # Raise because classes is required on first call of partial_fit
    assert_raises(ValueError, GaussianNB().partial_fit, X, y)
コード例 #8
0
ファイル: test_allocator.py プロジェクト: rapidsai/cuml
def test_naive_bayes(nlp_20news):
    X, y = nlp_20news

    X = sparse_scipy_to_cp(X, cp.float32).astype(cp.float32)
    y = y.astype(cp.int32)

    with cupy_using_allocator(dummy_allocator):
        model = MultinomialNB()
        model.fit(X, y)

        y_hat = model.predict(X)
        y_hat = model.predict(X)
        y_hat = model.predict_proba(X)
        y_hat = model.predict_log_proba(X)
        y_hat = model.score(X, y)

        del y_hat
コード例 #9
0
ファイル: test_naive_bayes.py プロジェクト: st071300/cuML
def test_partial_fit(x_dtype, y_dtype, nlp_20news):
    chunk_size = 500

    X, y = nlp_20news

    X = sparse_scipy_to_cp(X, x_dtype).astype(x_dtype)
    y = y.astype(y_dtype)

    X = X.tocsr()

    model = MultinomialNB()

    classes = np.unique(y)

    total_fit = 0

    for i in range(math.ceil(X.shape[0] / chunk_size)):

        upper = i * chunk_size + chunk_size
        if upper > X.shape[0]:
            upper = -1

        if upper > 0:
            x = X[i * chunk_size:upper]
            y_c = y[i * chunk_size:upper]
        else:
            x = X[i * chunk_size:]
            y_c = y[i * chunk_size:]

        model.partial_fit(x, y_c, classes=classes)

        total_fit += (upper - (i * chunk_size))

        if upper == -1:
            break

    y_hat = model.predict(X)

    y_hat = cp.asnumpy(y_hat)
    y = cp.asnumpy(y)

    assert accuracy_score(y, y_hat) >= 0.924
コード例 #10
0
ファイル: test_naive_bayes.py プロジェクト: st071300/cuML
def test_basic_fit_predict_dense_numpy(x_dtype, y_dtype, nlp_20news):
    """
    Cupy Test
    """
    X, y = nlp_20news

    X = sparse_scipy_to_cp(X, cp.float32)
    y = y.astype(y_dtype)

    X = X.tocsr()[0:500].todense()
    y = y[:500]

    model = MultinomialNB()
    model.fit(np.ascontiguousarray(cp.asnumpy(X).astype(x_dtype)), y)

    y_hat = model.predict(X)

    y_hat = cp.asnumpy(y_hat)
    y = cp.asnumpy(y)

    accuracy_score(y, y_hat) >= 0.911
コード例 #11
0
ファイル: test_naive_bayes.py プロジェクト: daxiongshu/cuml
def test_multinomial_basic_fit_predict_dense_numpy(x_dtype, y_dtype,
                                                   nlp_20news):
    """
    Cupy Test
    """
    X, y = nlp_20news
    n_rows = 500

    X = sparse_scipy_to_cp(X, cp.float32).tocsr()[:n_rows]
    y = y[:n_rows].astype(y_dtype)

    model = MultinomialNB()
    model.fit(np.ascontiguousarray(cp.asnumpy(X.todense()).astype(x_dtype)), y)

    y_hat = model.predict(X).get()

    modelsk = skNB()
    modelsk.fit(X.get(), y.get())
    y_sk = model.predict(X.get())

    assert_allclose(y_hat, y_sk)
コード例 #12
0
ファイル: test_naive_bayes.py プロジェクト: daxiongshu/cuml
def test_categorical_partial_fit(x_dtype, y_dtype, is_sparse, nlp_20news):
    if x_dtype == cp.int32 and is_sparse:
        pytest.skip("Sparse matrices with integers dtype are not supported")
    n_rows = 5000
    n_cols = 500
    chunk_size = 1000

    X, y = nlp_20news

    X = sparse_scipy_to_cp(X, 'float32').tocsr()[:n_rows]
    if is_sparse:
        X.data = X.data.astype(x_dtype)
        expected_score = 0.5414
    else:
        X = X[:, :n_cols].todense().astype(x_dtype)
        expected_score = 0.1040
    y = y.astype(y_dtype)[:n_rows]

    model = CategoricalNB()

    classes = np.unique(y)
    for i in range(math.ceil(X.shape[0] / chunk_size)):

        upper = i * chunk_size + chunk_size
        if upper > X.shape[0]:
            upper = -1

        if upper > 0:
            x = X[i * chunk_size:upper]
            y_c = y[i * chunk_size:upper]
        else:
            x = X[i * chunk_size:]
            y_c = y[i * chunk_size:]
        model.partial_fit(x, y_c, classes=classes)
        if upper == -1:
            break

    cuml_score = model.score(X, y)
    THRES = 1e-4
    assert expected_score - THRES <= cuml_score <= expected_score + THRES
コード例 #13
0
ファイル: test_naive_bayes.py プロジェクト: st071300/cuML
def test_predict_log_proba(x_dtype, y_dtype, nlp_20news):

    X, y = nlp_20news

    cu_X = sparse_scipy_to_cp(X, x_dtype).astype(x_dtype)
    cu_y = y.astype(y_dtype)

    cu_X = cu_X.tocsr()

    y = y.get()

    cuml_model = MultinomialNB()
    sk_model = skNB()

    cuml_model.fit(cu_X, cu_y)

    sk_model.fit(X, y)

    cuml_proba = cuml_model.predict_log_proba(cu_X).get()
    sk_proba = sk_model.predict_log_proba(X)

    assert_allclose(cuml_proba, sk_proba, atol=1e-2, rtol=1e-2)
コード例 #14
0
ファイル: test_naive_bayes.py プロジェクト: daxiongshu/cuml
def test_bernoulli_partial_fit(x_dtype, y_dtype, nlp_20news):
    chunk_size = 500
    n_rows = 1500

    X, y = nlp_20news

    X = sparse_scipy_to_cp(X, x_dtype).astype(x_dtype)
    y = y.astype(y_dtype)[:n_rows]

    X = X.tocsr()[:n_rows]

    model = BernoulliNB()
    modelsk = skBNB()

    classes = np.unique(y)

    for i in range(math.ceil(X.shape[0] / chunk_size)):

        upper = i * chunk_size + chunk_size
        if upper > X.shape[0]:
            upper = -1

        if upper > 0:
            x = X[i * chunk_size:upper]
            y_c = y[i * chunk_size:upper]
        else:
            x = X[i * chunk_size:]
            y_c = y[i * chunk_size:]

        model.partial_fit(x, y_c, classes=classes)
        modelsk.partial_fit(x.get(), y_c.get(), classes=classes.get())
        if upper == -1:
            break

    y_hat = model.predict(X).get()
    y_sk = modelsk.predict(X.get())

    assert_allclose(y_hat, y_sk)
コード例 #15
0
ファイル: test_naive_bayes.py プロジェクト: st071300/cuML
def test_score(x_dtype, y_dtype, nlp_20news):

    X, y = nlp_20news

    cu_X = sparse_scipy_to_cp(X, x_dtype).astype(x_dtype)
    cu_y = y.astype(y_dtype)

    cu_X = cu_X.tocsr()

    y = y.get()

    cuml_model = MultinomialNB()
    sk_model = skNB()

    cuml_model.fit(cu_X, cu_y)

    sk_model.fit(X, y)

    cuml_score = cuml_model.score(cu_X, cu_y)
    sk_score = sk_model.score(X, y)

    THRES = 1e-4

    assert sk_score - THRES <= cuml_score <= sk_score + THRES