def test_get_sample_data_larger_1k(self): """ Get sample when data is larger than 1k """ X = pd.DataFrame(np.random.uniform(size=(5763, 31))) y = pd.Series(np.random.randint(0, 2, size=(5763, ))) X_, y_ = PlotSHAP.get_sample(X, y) self.assertEqual(X_.shape[0], 1000) self.assertEqual(y_.shape[0], 1000)
def test_get_sample_data_smaller_1k(self): """ Get sample when data is smaller than 1k """ SAMPLES = 100 X = pd.DataFrame(np.random.uniform(size=(SAMPLES, 31))) y = pd.Series(np.random.randint(0, 2, size=(SAMPLES, ))) X_, y_ = PlotSHAP.get_sample(X, y) self.assertEqual(X_.shape[0], SAMPLES) self.assertEqual(y_.shape[0], SAMPLES)
def interpret( self, X_train, y_train, X_validation, y_validation, model_file_path, learner_name, target_name=None, class_names=None, metric_name=None, ml_task=None, explain_level=2, ): # do not produce feature importance for Baseline if self.algorithm_short_name == "Baseline": return if explain_level > 0: PermutationImportance.compute_and_plot( self, X_validation, y_validation, model_file_path, learner_name, metric_name, ml_task, self.params.get("n_jobs", -1) ) if explain_level > 1: PlotSHAP.compute( self, X_train, y_train, X_validation, y_validation, model_file_path, learner_name, class_names, ml_task, )