def interpret( self, X_train, y_train, X_validation, y_validation, model_file_path, learner_name, target_name=None, class_names=None, metric_name=None, ml_task=None, explain_level=2, ): # do not produce feature importance for Baseline if self.algorithm_short_name == "Baseline": return if explain_level > 0: PermutationImportance.compute_and_plot( self, X_validation, y_validation, model_file_path, learner_name, metric_name, ml_task, self.params.get("n_jobs", -1) ) if explain_level > 1: PlotSHAP.compute( self, X_train, y_train, X_validation, y_validation, model_file_path, learner_name, class_names, ml_task, )
def test_compute_and_plot(self): rows = 20 X = np.random.rand(rows, 3) X = pd.DataFrame(X, columns=[f"f{i}" for i in range(3)]) y = np.random.randint(0, 2, rows) model = XGBClassifier(n_estimators=1, max_depth=2) model.fit(X, y) with tempfile.TemporaryDirectory() as tmpdir: PermutationImportance.compute_and_plot( model, X_validation=X, y_validation=y, model_file_path=tmpdir, learner_name="learner_test", metric_name=None, ml_task="binary_classification", ) self.assertTrue( os.path.exists( os.path.join(tmpdir, "learner_test_importance.csv")))