def test_valid_prediction(alpha: Any) -> None: """Test fit and predict.""" model = LogisticRegression(multi_class="multinomial") model.fit(X_toy, y_toy) mapie_clf = MapieClassifier(estimator=model, cv="prefit") mapie_clf.fit(X_toy, y_toy) mapie_clf.predict(X_toy, alpha=alpha)
def test_invalid_include_last_label(include_last_label: Any) -> None: """Test that invalid include_last_label raise errors.""" mapie_clf = MapieClassifier() mapie_clf.fit(X_toy, y_toy) with pytest.raises(ValueError, match=r".*Invalid include_last_label argument.*"): mapie_clf.predict(X_toy, y_toy, include_last_label=include_last_label)
def test_valid_cv(cv: Any) -> None: """Test that valid cv raises no errors.""" model = LogisticRegression(multi_class="multinomial") model.fit(X_toy, y_toy) mapie_clf = MapieClassifier(estimator=model, cv=cv) mapie_clf.fit(X_toy, y_toy) mapie_clf.predict(X_toy, alpha=0.5)
def test_method_error_in_predict(method: Any, alpha: float) -> None: """Test else condition for the method in .predict""" mapie_clf = MapieClassifier(method="score") mapie_clf.fit(X_toy, y_toy) mapie_clf.method = method with pytest.raises(ValueError, match=r".*Invalid method.*"): mapie_clf.predict(X_toy, alpha=alpha)
def test_results_for_alpha_as_float_and_arraylike(strategy: str, alpha: Any) -> None: """Test that output values do not depend on type of alpha.""" args_init, args_predict = STRATEGIES[strategy] mapie_clf = MapieClassifier(**args_init) mapie_clf.fit(X, y) y_pred_float1, y_ps_float1 = mapie_clf.predict( X, alpha=alpha[0], include_last_label=args_predict["include_last_label"], agg_scores=args_predict["agg_scores"]) y_pred_float2, y_ps_float2 = mapie_clf.predict( X, alpha=alpha[1], include_last_label=args_predict["include_last_label"], agg_scores=args_predict["agg_scores"]) y_pred_array, y_ps_array = mapie_clf.predict( X, alpha=alpha, include_last_label=args_predict["include_last_label"], agg_scores=args_predict["agg_scores"]) np.testing.assert_allclose(y_pred_float1, y_pred_array) np.testing.assert_allclose(y_pred_float2, y_pred_array) np.testing.assert_allclose(y_ps_float1[:, :, 0], y_ps_array[:, :, 0]) np.testing.assert_allclose(y_ps_float2[:, :, 0], y_ps_array[:, :, 1])
def test_too_large_cv(cv: Any) -> None: """Test that too large cv raise sklearn errors.""" mapie_clf = MapieClassifier(cv=cv) with pytest.raises( ValueError, match=rf".*Cannot have number of splits n_splits={cv} greater.*", ): mapie_clf.fit(X_toy, y_toy)
def test_sum_proba_to_one_fit(y_pred_proba: NDArray) -> None: """ Test if when the output probabilities of the model do not sum to one, return an error in the fit method. """ wrong_model = WrongOutputModel(y_pred_proba) mapie_clf = MapieClassifier(wrong_model, cv="prefit") with pytest.raises(AssertionError, match=r".*The sum of the scores is not equal to one.*"): mapie_clf.fit(X_toy, y_toy)
def test_include_label_error_in_predict(monkeypatch: Any, include_labels: Union[bool, str], alpha: float) -> None: """Test else condition for include_label parameter in .predict""" monkeypatch.setattr(MapieClassifier, "_check_include_last_label", do_nothing) mapie_clf = MapieClassifier(method="cumulated_score") mapie_clf.fit(X_toy, y_toy) with pytest.raises(ValueError, match=r".*Invalid include.*"): mapie_clf.predict(X_toy, alpha=alpha, include_last_label=include_labels)
def test_sum_proba_to_one_predict( y_pred_proba: NDArray, alpha: Union[float, Iterable[float]]) -> None: """ Test if when the output probabilities of the model do not sum to one, return an error in the predict method. """ wrong_model = WrongOutputModel(y_pred_proba) mapie_clf = MapieClassifier(cv="prefit") mapie_clf.fit(X_toy, y_toy) mapie_clf.single_estimator_ = wrong_model with pytest.raises(AssertionError, match=r".*The sum of the scores is not equal to one.*"): mapie_clf.predict(X_toy, alpha=alpha)
def test_classifier_without_classes_attribute( estimator: ClassifierMixin) -> None: """ Test that prefitted classifier without 'classes_ 'attribute raises error. """ estimator.fit(X_toy, y_toy) if isinstance(estimator, Pipeline): delattr(estimator[-1], "classes_") else: delattr(estimator, "classes_") mapie = MapieClassifier(estimator=estimator, cv="prefit") with pytest.raises(AttributeError, match=r".*does not contain 'classes_'.*"): mapie.fit(X_toy, y_toy)
def test_results_for_same_alpha(strategy: str) -> None: """ Test that predictions and intervals are similar with two equal values of alpha. """ args_init, args_predict = STRATEGIES[strategy] mapie_clf = MapieClassifier(**args_init) mapie_clf.fit(X, y) _, y_ps = mapie_clf.predict( X, alpha=[0.1, 0.1], include_last_label=args_predict["include_last_label"], agg_scores=args_predict["agg_scores"]) np.testing.assert_allclose(y_ps[:, 0, 0], y_ps[:, 0, 1]) np.testing.assert_allclose(y_ps[:, 1, 0], y_ps[:, 1, 1])
def test_predict_output_shape(strategy: str, alpha: Any, dataset: Tuple[NDArray, NDArray]) -> None: """Test predict output shape.""" args_init, args_predict = STRATEGIES[strategy] mapie_clf = MapieClassifier(**args_init) X, y = dataset mapie_clf.fit(X, y) y_pred, y_ps = mapie_clf.predict( X, alpha=alpha, include_last_label=args_predict["include_last_label"], agg_scores=args_predict["agg_scores"]) n_alpha = len(alpha) if hasattr(alpha, "__len__") else 1 assert y_pred.shape == (X.shape[0], ) assert y_ps.shape == (X.shape[0], len(np.unique(y)), n_alpha)
def test_cumulated_scores() -> None: """Test cumulated score method on a tiny dataset.""" alpha = [0.65] quantile = [0.750183952461055] # fit cumclf = CumulatedScoreClassifier() cumclf.fit(cumclf.X_calib, cumclf.y_calib) mapie_clf = MapieClassifier(cumclf, method="cumulated_score", cv="prefit", random_state=42) mapie_clf.fit(cumclf.X_calib, cumclf.y_calib) np.testing.assert_allclose(mapie_clf.conformity_scores_, cumclf.y_calib_scores) # predict _, y_ps = mapie_clf.predict(cumclf.X_test, include_last_label=True, alpha=alpha) np.testing.assert_allclose(mapie_clf.quantiles_, quantile) np.testing.assert_allclose(y_ps[:, :, 0], cumclf.y_pred_sets)
def test_image_cumulated_scores(X: Dict[str, ArrayLike]) -> None: """Test image as input for cumulated_score method.""" alpha = [0.65] quantile = [0.750183952461055] # fit X_calib = X["X_calib"] X_test = X["X_test"] cumclf = ImageClassifier(X_calib, X_test) cumclf.fit(cumclf.X_calib, cumclf.y_calib) mapie = MapieClassifier(cumclf, method="cumulated_score", cv="prefit", random_state=42) mapie.fit(cumclf.X_calib, cumclf.y_calib) np.testing.assert_allclose(mapie.conformity_scores_, cumclf.y_calib_scores) # predict _, y_ps = mapie.predict(cumclf.X_test, include_last_label=True, alpha=alpha) np.testing.assert_allclose(mapie.quantiles_, quantile) np.testing.assert_allclose(y_ps[:, :, 0], cumclf.y_pred_sets)
def test_pipeline_compatibility(strategy: str) -> None: """Check that MAPIE works on pipeline based on pandas dataframes""" X = pd.DataFrame({ "x_cat": ["A", "A", "B", "A", "A", "B"], "x_num": [0, 1, 1, 4, np.nan, 5], }) y = pd.Series([0, 1, 2, 0, 1, 0]) numeric_preprocessor = Pipeline([ ("imputer", SimpleImputer(strategy="mean")), ]) categorical_preprocessor = Pipeline( steps=[("encoding", OneHotEncoder(handle_unknown="ignore"))]) preprocessor = ColumnTransformer([ ("cat", categorical_preprocessor, ["x_cat"]), ("num", numeric_preprocessor, ["x_num"]) ]) pipe = make_pipeline(preprocessor, LogisticRegression()) pipe.fit(X, y) mapie = MapieClassifier(estimator=pipe, **STRATEGIES[strategy][0]) mapie.fit(X, y) mapie.predict(X)
def test_results_with_constant_sample_weights(strategy: str) -> None: """ Test predictions when sample weights are None or constant with different values. """ args_init, args_predict = STRATEGIES[strategy] lr = LogisticRegression(C=1e-99) lr.fit(X_toy, y_toy) n_samples = len(X_toy) mapie_clf0 = MapieClassifier(lr, **args_init) mapie_clf1 = MapieClassifier(lr, **args_init) mapie_clf2 = MapieClassifier(lr, **args_init) mapie_clf0.fit(X_toy, y_toy, sample_weight=None) mapie_clf1.fit(X_toy, y_toy, sample_weight=np.ones(shape=n_samples)) mapie_clf2.fit(X_toy, y_toy, sample_weight=np.ones(shape=n_samples) * 5) y_pred0, y_ps0 = mapie_clf0.predict( X_toy, alpha=0.2, include_last_label=args_predict["include_last_label"], agg_scores=args_predict["agg_scores"]) y_pred1, y_ps1 = mapie_clf1.predict( X_toy, alpha=0.2, include_last_label=args_predict["include_last_label"], agg_scores=args_predict["agg_scores"]) y_pred2, y_ps2 = mapie_clf2.predict( X_toy, alpha=0.2, include_last_label=args_predict["include_last_label"], agg_scores=args_predict["agg_scores"]) np.testing.assert_allclose(y_pred0, y_pred1) np.testing.assert_allclose(y_pred0, y_pred2) np.testing.assert_allclose(y_ps0, y_ps1) np.testing.assert_allclose(y_ps0, y_ps2)
def test_results_single_and_multi_jobs(strategy: str) -> None: """ Test that MapieRegressor gives equal predictions regardless of number of parallel jobs. """ args_init, args_predict = STRATEGIES[strategy] mapie_clf_single = MapieClassifier(n_jobs=1, **args_init) mapie_clf_multi = MapieClassifier(n_jobs=-1, **args_init) mapie_clf_single.fit(X_toy, y_toy) mapie_clf_multi.fit(X_toy, y_toy) y_pred_single, y_ps_single = mapie_clf_single.predict( X_toy, alpha=0.2, include_last_label=args_predict["include_last_label"], agg_scores=args_predict["agg_scores"]) y_pred_multi, y_ps_multi = mapie_clf_multi.predict( X_toy, alpha=0.2, include_last_label=args_predict["include_last_label"], agg_scores=args_predict["agg_scores"]) np.testing.assert_allclose(y_pred_single, y_pred_multi) np.testing.assert_allclose(y_ps_single, y_ps_multi)
def test_valid_estimator(strategy: str) -> None: """Test that valid estimators are not corrupted, for all strategies.""" clf = LogisticRegression().fit(X_toy, y_toy) mapie_clf = MapieClassifier(estimator=clf, **STRATEGIES[strategy][0]) mapie_clf.fit(X_toy, y_toy) assert isinstance(mapie_clf.single_estimator_, LogisticRegression)
def test_method_error_in_fit(monkeypatch: Any, method: str) -> None: """Test else condition for the method in .fit""" monkeypatch.setattr(MapieClassifier, "_check_parameters", do_nothing) mapie_clf = MapieClassifier(method=method) with pytest.raises(ValueError, match=r".*Invalid method.*"): mapie_clf.fit(X_toy, y_toy)
def test_valid_method(method: str) -> None: """Test that valid methods raise no errors.""" mapie_clf = MapieClassifier(method=method) mapie_clf.fit(X_toy, y_toy) check_is_fitted(mapie_clf, mapie_clf.fit_attributes)
def test_agg_scores_argument(agg_scores: str) -> None: """Test that predict passes with all valid 'agg_scores' arguments.""" mapie_clf = MapieClassifier(cv=3, method="score") mapie_clf.fit(X_toy, y_toy) mapie_clf.predict(X_toy, alpha=0.5, agg_scores=agg_scores)
y_train = np.hstack([np.full(n_samples, i) for i in range(n_classes)]) # Create test from (x, y) coordinates xx, yy = np.meshgrid( np.arange(x_min, x_max, step), np.arange(x_min, x_max, step) ) X_test = np.stack([xx.ravel(), yy.ravel()], axis=1) # Apply MapieClassifier on the dataset to get prediction sets clf = GaussianNB().fit(X_train, y_train) y_pred = clf.predict(X_test) y_pred_proba = clf.predict_proba(X_test) y_pred_proba_max = np.max(y_pred_proba, axis=1) mapie = MapieClassifier(estimator=clf, cv="prefit", method="score") mapie.fit(X_train, y_train) y_pred_mapie, y_ps_mapie = mapie.predict(X_test, alpha=alpha) # Plot the results tab10 = plt.cm.get_cmap("Purples", 4) colors = {0: "#1f77b4", 1: "#ff7f0e", 2: "#2ca02c", 3: "#d62728"} y_pred_col = list(map(colors.get, y_pred_mapie)) y_train_col = list(map(colors.get, y_train)) y_train_col = [colors[int(i)] for _, i in enumerate(y_train)] fig, axs = plt.subplots(1, 4, figsize=(20, 4)) axs[0].scatter( X_test[:, 0], X_test[:, 1], color=y_pred_col, marker=".", s=10, alpha=0.4 ) axs[0].scatter( X_train[:, 0], X_train[:, 1],
def test_invalid_agg_scores_argument(agg_scores: str) -> None: """Test that invalid 'agg_scores' raise errors.""" mapie_clf = MapieClassifier(cv=3, method="score") mapie_clf.fit(X_toy, y_toy) with pytest.raises(ValueError, match=r".*Invalid 'agg_scores' argument.*"): mapie_clf.predict(X_toy, alpha=0.5, agg_scores=agg_scores)
disp1.figure_.suptitle("Confusion matrix - Original vs Corrupted datasets") ############################################################################## # 3. Estimating prediction sets with MAPIE # ---------------------------------------- # We now use :class:`mapie.classification.MapieClassifier` to estimate # prediction sets for both datasets using the "cumulated_score" `method` and # for `alpha` values ranging from 0.01 to 0.99. alpha = np.arange(0.01, 1, 0.01) mapie_clf1 = MapieClassifier( clf1, method="cumulated_score", cv="prefit", random_state=42 ) mapie_clf1.fit(X_calib1, y_calib1) y_pred1, y_ps1 = mapie_clf1.predict( X_test1, alpha=alpha, include_last_label="randomized" ) mapie_clf2 = MapieClassifier( clf2, method="cumulated_score", cv="prefit", random_state=42 ) mapie_clf2.fit(X_calib2, y_calib2) y_pred2, y_ps2 = mapie_clf2.predict( X_test2, alpha=alpha, include_last_label="randomized" ) ############################################################################## # We can then estimate the marginal coverage for all alpha values in order # to produce a so-called calibration plot, comparing the target coverage with
# We split our training dataset into 5 folds and use each fold as a # calibration set. Each calibration set is therefore used to estimate the # conformity scores and the given quantiles for the two methods implemented in # :class:`mapie.classification.MapieClassifier`. kf = KFold(n_splits=5, shuffle=True) clfs, mapies, y_preds, y_ps_mapies = {}, {}, {}, {} methods = ["score", "cumulated_score"] alpha = np.arange(0.01, 1, 0.01) for method in methods: clfs_, mapies_, y_preds_, y_ps_mapies_ = {}, {}, {}, {} for fold, (train_index, calib_index) in enumerate(kf.split(X_train)): clf = GaussianNB().fit(X_train[train_index], y_train[train_index]) clfs_[fold] = clf mapie = MapieClassifier(estimator=clf, cv="prefit", method=method) mapie.fit(X_train[calib_index], y_train[calib_index]) mapies_[fold] = mapie y_pred_mapie, y_ps_mapie = mapie.predict( X_test_distrib, alpha=alpha, include_last_label="randomized") y_preds_[fold], y_ps_mapies_[fold] = y_pred_mapie, y_ps_mapie clfs[method], mapies[method], y_preds[method], y_ps_mapies[method] = ( clfs_, mapies_, y_preds_, y_ps_mapies_) ############################################################################## # Let's now plot the distribution of conformity scores for each calibration # set and the estimated quantile for ``alpha`` = 0.1. fig, axs = plt.subplots(1, len(mapies["score"]), figsize=(20, 4)) for i, (key, mapie) in enumerate(mapies["score"].items()): axs[i].set_xlabel("Conformity scores") axs[i].hist(mapie.conformity_scores_)