Example #1
0
    def test_get_sample_data_larger_1k(self):
        """ Get sample when data is larger than 1k """
        X = pd.DataFrame(np.random.uniform(size=(5763, 31)))
        y = pd.Series(np.random.randint(0, 2, size=(5763, )))

        X_, y_ = PlotSHAP.get_sample(X, y)

        self.assertEqual(X_.shape[0], 1000)
        self.assertEqual(y_.shape[0], 1000)
Example #2
0
    def test_get_sample_data_smaller_1k(self):
        """ Get sample when data is smaller than 1k """
        SAMPLES = 100
        X = pd.DataFrame(np.random.uniform(size=(SAMPLES, 31)))
        y = pd.Series(np.random.randint(0, 2, size=(SAMPLES, )))

        X_, y_ = PlotSHAP.get_sample(X, y)

        self.assertEqual(X_.shape[0], SAMPLES)
        self.assertEqual(y_.shape[0], SAMPLES)
Example #3
0
 def interpret(
     self,
     X_train,
     y_train,
     X_validation,
     y_validation,
     model_file_path,
     learner_name,
     target_name=None,
     class_names=None,
     metric_name=None,
     ml_task=None,
     explain_level=2,
 ):
     # do not produce feature importance for Baseline
     if self.algorithm_short_name == "Baseline":
         return
     if explain_level > 0:
         PermutationImportance.compute_and_plot(
             self,
             X_validation,
             y_validation,
             model_file_path,
             learner_name,
             metric_name,
             ml_task,
             self.params.get("n_jobs", -1)
         )
     if explain_level > 1:
         PlotSHAP.compute(
             self,
             X_train,
             y_train,
             X_validation,
             y_validation,
             model_file_path,
             learner_name,
             class_names,
             ml_task,
         )