Ejemplo n.º 1
0
    def test_generate_local_sample(self):

        row = self.frame.iloc[0, :]
        explainer = LIMEExplainer(training_frame=self.frame,
                                  X=self.X,
                                  model=self.model)
        local_sample = explainer._generate_local_sample(row)
        N = explainer.N
        self.assertAlmostEqual(local_sample.loc[:, 'MSSubClass'].std(),
                               self.frame.loc[:, 'MSSubClass'].std(),
                               places=0)
        self.assertEqual(local_sample.shape[0], N)
        del explainer
Ejemplo n.º 2
0
    def test_explain_w_o_discretize(self):

        row_id = 0
        explainer = LIMEExplainer(training_frame=self.frame,
                                  X=self.X,
                                  model=self.model,
                                  discretize=None)
        explainer.explain(row_id)
        self.assertAlmostEqual(
            explainer.lime_pred,
            explainer.lime.coef()['Intercept'] +
            explainer.reason_code_values['Local Contribution'].sum())
        del explainer
Ejemplo n.º 3
0
    def test_local_contrib_w_discretize(self):

        row = self.frame.iloc[0, :]

        explainer = LIMEExplainer(training_frame=self.frame,
                                  X=self.X,
                                  model=self.model,
                                  discretize=self.discretize)
        local_sample = explainer._generate_local_sample(row)
        scored_local_sample = \
            explainer._score_local_sample(local_sample,
                                          row[local_sample.columns])
        weighted_scored_local_sample = \
            explainer._calculate_distance_weights(0,
                                                  scored_local_sample)
        discretized_weighted_scored_local_sample = \
            explainer._discretize_numeric(weighted_scored_local_sample)

        disc_row = pd.DataFrame(columns=self.X)
        for name in self.discretize:
            disc_row[name] = pd.cut(pd.Series(row[name]),
                                    bins=explainer.bins_dict[name])
        not_in = list(set(self.X) - set(self.discretize))
        disc_row[not_in] = row[not_in].values

        explainer.lime = \
            explainer._regress(discretized_weighted_scored_local_sample,
                               h2o.H2OFrame(disc_row))

        self.assertEqual(explainer.reason_code_values.shape, (21, 2))
        del explainer
Ejemplo n.º 4
0
    def test_score_local_sample(self):

        row = self.frame.iloc[0, :]
        explainer = LIMEExplainer(training_frame=self.frame,
                                  X=self.X,
                                  model=self.model)
        local_sample = explainer._generate_local_sample(row)
        scored_local_sample = \
            explainer._score_local_sample(local_sample,
                                          row[local_sample.columns])
        self.assertEqual(scored_local_sample.shape[1],
                         local_sample.shape[1] + 1)
        self.assertEqual(scored_local_sample.columns[-1], 'predict')
        del explainer
Ejemplo n.º 5
0
    def test_calculate_distance_weights(self):

        row = self.frame.iloc[0, :]
        explainer = LIMEExplainer(training_frame=self.frame,
                                  X=self.X,
                                  model=self.model)
        local_sample = explainer._generate_local_sample(row)
        scored_local_sample = \
            explainer._score_local_sample(local_sample,
                                          row[local_sample.columns])
        weighted_scored_local_sample = \
            explainer._calculate_distance_weights(0,
                                                  scored_local_sample)
        self.assertEqual(weighted_scored_local_sample.shape[1],
                         local_sample.shape[1] + 2)
        self.assertEqual(weighted_scored_local_sample.columns[-1], 'distance')
        del explainer
Ejemplo n.º 6
0
    def test_regress_w_discretize(self):

        row = self.frame.iloc[0, :]
        explainer = LIMEExplainer(training_frame=self.frame,
                                  X=self.X,
                                  model=self.model,
                                  discretize=self.discretize)
        local_sample = explainer._generate_local_sample(row)
        scored_local_sample = \
            explainer._score_local_sample(local_sample,
                                          row[local_sample.columns])
        weighted_scored_local_sample = \
            explainer._calculate_distance_weights(0,
                                                 scored_local_sample)
        discretized_weighted_scored_local_sample = \
            explainer._discretize_numeric(weighted_scored_local_sample)

        disc_row = pd.DataFrame(columns=self.X)
        for name in self.discretize:
            disc_row[name] = pd.cut(pd.Series(row[name]),
                                    bins=explainer.bins_dict[name])

        not_in = list(set(self.X) - set(self.discretize))
        disc_row[not_in] = row[not_in].values

        lime = explainer._regress(discretized_weighted_scored_local_sample,
                                  h2o.H2OFrame(disc_row))

        self.assertTrue(explainer.discretize)
        self.assertIsNotNone(lime)
        self.assertAlmostEqual(0.9859272974096093, explainer.lime_r2)
        del explainer
Ejemplo n.º 7
0
    def test_local_contrib_w_o_discretize(self):

        row = self.frame.iloc[0, :]

        explainer = LIMEExplainer(training_frame=self.frame,
                                  X=self.X,
                                  model=self.model)
        local_sample = explainer._generate_local_sample(row)
        scored_local_sample = \
            explainer._score_local_sample(local_sample,
                                          row[local_sample.columns])
        weighted_scored_local_sample = \
            explainer._calculate_distance_weights(0,
                                                  scored_local_sample)
        explainer.lime = \
            explainer._regress(weighted_scored_local_sample,
                               h2o.H2OFrame(pd.DataFrame(row).T))

        self.assertEqual(explainer.reason_code_values.shape, (26, 2))
        del explainer
Ejemplo n.º 8
0
    def test_regress_w_o_discretize(self):

        row = self.frame.iloc[0, :]
        explainer = LIMEExplainer(training_frame=self.frame,
                                  X=self.X,
                                  model=self.model)
        local_sample = explainer._generate_local_sample(row)
        scored_local_sample = \
            explainer._score_local_sample(local_sample,
                                          row[local_sample.columns])
        weighted_scored_local_sample = \
            explainer._calculate_distance_weights(0,
                                                  scored_local_sample)
        lime = explainer._regress(weighted_scored_local_sample,
                                  h2o.H2OFrame(pd.DataFrame(row).T))
        self.assertFalse(explainer.discretize)
        self.assertIsNotNone(lime)
        self.assertAlmostEqual(0.9649889709835218, explainer.lime_r2)
        del explainer
Ejemplo n.º 9
0
    def test_discretize_numeric(self):

        row = self.frame.iloc[0, :]
        explainer = LIMEExplainer(training_frame=self.frame,
                                  X=self.X,
                                  model=self.model,
                                  discretize=self.discretize)
        N = explainer.N
        local_sample = explainer._generate_local_sample(row)
        scored_local_sample = \
            explainer._score_local_sample(local_sample,
                                          row[local_sample.columns])
        weighted_scored_local_sample = \
            explainer._calculate_distance_weights(0,
                                                  scored_local_sample)
        discretized_weighted_scored_local_sample = \
            explainer._discretize_numeric(weighted_scored_local_sample)
        self.assertEqual(discretized_weighted_scored_local_sample.shape,
                         (N, local_sample.shape[1] + 2))
        self.assertEqual(
            discretized_weighted_scored_local_sample[
                self.discretize].dtypes.unique()[0], 'category')
        del explainer