def test_gnb_prior(): # Test whether class priors are properly set. clf = GaussianNB().fit(X, y) assert_array_almost_equal(np.array([3, 3]) / 6.0, clf.class_prior_, 8) clf.fit(X1, y1) # Check that the class priors sum to 1 assert_array_almost_equal(clf.class_prior_.sum(), 1)
def test_gnb_pfit_wrong_nb_features(): """Test whether an error is raised when the number of feature changes between two partial fit""" clf = GaussianNB() # Fit for the first time the GNB clf.fit(X, y) # Partial fit a second time with an incoherent X assert_raises(ValueError, clf.partial_fit, np.hstack((X, X)), y)
def test_gnb_priors_sum_isclose(): # test whether the class prior sum is properly tested""" X = np.array([[-1, -1], [-2, -1], [-3, -2], [-4, -5], [-5, -4], [1, 1], [2, 1], [3, 2], [4, 4], [5, 5]]) priors = np.array( [0.08, 0.14, 0.03, 0.16, 0.11, 0.16, 0.07, 0.14, 0.11, 0.0]) Y = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) clf = GaussianNB(priors) # smoke test for issue #9633 clf.fit(X, Y)
def test_predict_on_toy_problem(): """Manually check predicted class labels for toy dataset.""" clf1 = LogisticRegression(random_state=123) clf2 = RandomForestClassifier(random_state=123) clf3 = GaussianNB() X = np.array([[-1.1, -1.5], [-1.2, -1.4], [-3.4, -2.2], [1.1, 1.2], [2.1, 1.4], [3.1, 2.3]]) y = np.array([1, 1, 1, 2, 2, 2]) assert all(clf1.fit(X, y).predict(X)) == all([1, 1, 1, 2, 2, 2]) assert all(clf2.fit(X, y).predict(X)) == all([1, 1, 1, 2, 2, 2]) assert all(clf3.fit(X, y).predict(X)) == all([1, 1, 1, 2, 2, 2]) eclf = VotingClassifier(estimators=[ ('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting='hard', weights=[1, 1, 1]) assert all(eclf.fit(X, y).predict(X)) == all([1, 1, 1, 2, 2, 2]) eclf = VotingClassifier(estimators=[ ('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting='soft', weights=[1, 1, 1]) assert all(eclf.fit(X, y).predict(X)) == all([1, 1, 1, 2, 2, 2])
def test_gnb(): # Gaussian Naive Bayes classification. # This checks that GaussianNB implements fit and predict and returns # correct values for a simple toy dataset. clf = GaussianNB() y_pred = clf.fit(X, y).predict(X) assert_array_equal(y_pred, y) y_pred_proba = clf.predict_proba(X) y_pred_log_proba = clf.predict_log_proba(X) assert_array_almost_equal(np.log(y_pred_proba), y_pred_log_proba, 8) # Test whether label mismatch between target y and classes raises # an Error # FIXME Remove this test once the more general partial_fit tests are merged assert_raises(ValueError, GaussianNB().partial_fit, X, y, classes=[0, 1])
def test_gnb_prior_large_bias(): """Test if good prediction when class prior favor largely one class""" clf = GaussianNB(priors=np.array([0.01, 0.99])) clf.fit(X, y) assert clf.predict([[-0.1, -0.1]]) == np.array([2])
cluster_std=1.0, centers=centers, shuffle=False, random_state=42) y[:n_samples // 2] = 0 y[n_samples // 2:] = 1 sample_weight = np.random.RandomState(42).rand(y.shape[0]) # split train, test for calibration X_train, X_test, y_train, y_test, sw_train, sw_test = \ train_test_split(X, y, sample_weight, test_size=0.9, random_state=42) # Gaussian Naive-Bayes with no calibration clf = GaussianNB() clf.fit(X_train, y_train) # GaussianNB itself does not support sample-weights prob_pos_clf = clf.predict_proba(X_test)[:, 1] # Gaussian Naive-Bayes with isotonic calibration clf_isotonic = CalibratedClassifierCV(clf, cv=2, method='isotonic') clf_isotonic.fit(X_train, y_train, sw_train) prob_pos_isotonic = clf_isotonic.predict_proba(X_test)[:, 1] # Gaussian Naive-Bayes with sigmoid calibration clf_sigmoid = CalibratedClassifierCV(clf, cv=2, method='sigmoid') clf_sigmoid.fit(X_train, y_train, sw_train) prob_pos_sigmoid = clf_sigmoid.predict_proba(X_test)[:, 1] print("Brier scores: (the smaller the better)") clf_score = brier_score_loss(y_test, prob_pos_clf, sw_test)