예제 #1
0
 def interpret(
     self,
     X_train,
     y_train,
     X_validation,
     y_validation,
     model_file_path,
     learner_name,
     target_name=None,
     class_names=None,
     metric_name=None,
     ml_task=None,
     explain_level=2,
 ):
     # do not produce feature importance for Baseline
     if self.algorithm_short_name == "Baseline":
         return
     if explain_level > 0:
         PermutationImportance.compute_and_plot(
             self,
             X_validation,
             y_validation,
             model_file_path,
             learner_name,
             metric_name,
             ml_task,
             self.params.get("n_jobs", -1)
         )
     if explain_level > 1:
         PlotSHAP.compute(
             self,
             X_train,
             y_train,
             X_validation,
             y_validation,
             model_file_path,
             learner_name,
             class_names,
             ml_task,
         )
예제 #2
0
    def test_compute_and_plot(self):
        rows = 20
        X = np.random.rand(rows, 3)
        X = pd.DataFrame(X, columns=[f"f{i}" for i in range(3)])
        y = np.random.randint(0, 2, rows)

        model = XGBClassifier(n_estimators=1, max_depth=2)
        model.fit(X, y)

        with tempfile.TemporaryDirectory() as tmpdir:
            PermutationImportance.compute_and_plot(
                model,
                X_validation=X,
                y_validation=y,
                model_file_path=tmpdir,
                learner_name="learner_test",
                metric_name=None,
                ml_task="binary_classification",
            )
            self.assertTrue(
                os.path.exists(
                    os.path.join(tmpdir, "learner_test_importance.csv")))