def test_mnb_sample_weight(): clf = MultinomialNB() clf.fit([[1, 2], [1, 2], [1, 0]], [0, 0, 1], sample_weight=[1, 1, 4]) assert_array_equal(clf.predict([[1, 0]]), [1]) positive_prior = np.exp(clf.intercept_[0]) assert_array_almost_equal([1 - positive_prior, positive_prior], [1 / 3., 2 / 3.])
def test_calibration_prefit(): """Test calibration for prefitted classifiers""" n_samples = 50 X, y = make_classification(n_samples=3 * n_samples, n_features=6, random_state=42) sample_weight = np.random.RandomState(seed=42).uniform(size=y.size) X -= X.min() # MultinomialNB only allows positive X # split train and test X_train, y_train, sw_train = \ X[:n_samples], y[:n_samples], sample_weight[:n_samples] X_calib, y_calib, sw_calib = \ X[n_samples:2 * n_samples], y[n_samples:2 * n_samples], \ sample_weight[n_samples:2 * n_samples] X_test, y_test = X[2 * n_samples:], y[2 * n_samples:] # Naive-Bayes clf = MultinomialNB() clf.fit(X_train, y_train, sw_train) prob_pos_clf = clf.predict_proba(X_test)[:, 1] # Naive Bayes with calibration for this_X_calib, this_X_test in [(X_calib, X_test), (sparse.csr_matrix(X_calib), sparse.csr_matrix(X_test))]: for method in ['isotonic', 'sigmoid']: pc_clf = CalibratedClassifierCV(clf, method=method, cv="prefit") for sw in [sw_calib, None]: pc_clf.fit(this_X_calib, y_calib, sample_weight=sw) y_prob = pc_clf.predict_proba(this_X_test) y_pred = pc_clf.predict(this_X_test) prob_pos_pc_clf = y_prob[:, 1] assert_array_equal(y_pred, np.array([0, 1])[np.argmax(y_prob, axis=1)]) assert (brier_score_loss(y_test, prob_pos_clf) > brier_score_loss(y_test, prob_pos_pc_clf))
def test_mnnb(kind): # Test Multinomial Naive Bayes classification. # This checks that MultinomialNB implements fit and predict and returns # correct values for a simple toy dataset. if kind == 'dense': X = X2 elif kind == 'sparse': X = scipy.sparse.csr_matrix(X2) # Check the ability to predict the learning set. clf = MultinomialNB() assert_raises(ValueError, clf.fit, -X, y2) y_pred = clf.fit(X, y2).predict(X) assert_array_equal(y_pred, y2) # Verify that np.log(clf.predict_proba(X)) gives the same results as # clf.predict_log_proba(X) y_pred_proba = clf.predict_proba(X) y_pred_log_proba = clf.predict_log_proba(X) assert_array_almost_equal(np.log(y_pred_proba), y_pred_log_proba, 8) # Check that incremental fitting yields the same results clf2 = MultinomialNB() clf2.partial_fit(X[:2], y2[:2], classes=np.unique(y2)) clf2.partial_fit(X[2:5], y2[2:5]) clf2.partial_fit(X[5:], y2[5:]) y_pred2 = clf2.predict(X) assert_array_equal(y_pred2, y2) y_pred_proba2 = clf2.predict_proba(X) y_pred_log_proba2 = clf2.predict_log_proba(X) assert_array_almost_equal(np.log(y_pred_proba2), y_pred_log_proba2, 8) assert_array_almost_equal(y_pred_proba2, y_pred_proba) assert_array_almost_equal(y_pred_log_proba2, y_pred_log_proba) # Partial fit on the whole data at once should be the same as fit too clf3 = MultinomialNB() clf3.partial_fit(X, y2, classes=np.unique(y2)) y_pred3 = clf3.predict(X) assert_array_equal(y_pred3, y2) y_pred_proba3 = clf3.predict_proba(X) y_pred_log_proba3 = clf3.predict_log_proba(X) assert_array_almost_equal(np.log(y_pred_proba3), y_pred_log_proba3, 8) assert_array_almost_equal(y_pred_proba3, y_pred_proba) assert_array_almost_equal(y_pred_log_proba3, y_pred_log_proba)