def test_generate_local_sample(self): row = self.frame.iloc[0, :] explainer = LIMEExplainer(training_frame=self.frame, X=self.X, model=self.model) local_sample = explainer._generate_local_sample(row) N = explainer.N self.assertAlmostEqual(local_sample.loc[:, 'MSSubClass'].std(), self.frame.loc[:, 'MSSubClass'].std(), places=0) self.assertEqual(local_sample.shape[0], N) del explainer
def test_explain_w_o_discretize(self): row_id = 0 explainer = LIMEExplainer(training_frame=self.frame, X=self.X, model=self.model, discretize=None) explainer.explain(row_id) self.assertAlmostEqual( explainer.lime_pred, explainer.lime.coef()['Intercept'] + explainer.reason_code_values['Local Contribution'].sum()) del explainer
def test_local_contrib_w_discretize(self): row = self.frame.iloc[0, :] explainer = LIMEExplainer(training_frame=self.frame, X=self.X, model=self.model, discretize=self.discretize) local_sample = explainer._generate_local_sample(row) scored_local_sample = \ explainer._score_local_sample(local_sample, row[local_sample.columns]) weighted_scored_local_sample = \ explainer._calculate_distance_weights(0, scored_local_sample) discretized_weighted_scored_local_sample = \ explainer._discretize_numeric(weighted_scored_local_sample) disc_row = pd.DataFrame(columns=self.X) for name in self.discretize: disc_row[name] = pd.cut(pd.Series(row[name]), bins=explainer.bins_dict[name]) not_in = list(set(self.X) - set(self.discretize)) disc_row[not_in] = row[not_in].values explainer.lime = \ explainer._regress(discretized_weighted_scored_local_sample, h2o.H2OFrame(disc_row)) self.assertEqual(explainer.reason_code_values.shape, (21, 2)) del explainer
def test_score_local_sample(self): row = self.frame.iloc[0, :] explainer = LIMEExplainer(training_frame=self.frame, X=self.X, model=self.model) local_sample = explainer._generate_local_sample(row) scored_local_sample = \ explainer._score_local_sample(local_sample, row[local_sample.columns]) self.assertEqual(scored_local_sample.shape[1], local_sample.shape[1] + 1) self.assertEqual(scored_local_sample.columns[-1], 'predict') del explainer
def test_calculate_distance_weights(self): row = self.frame.iloc[0, :] explainer = LIMEExplainer(training_frame=self.frame, X=self.X, model=self.model) local_sample = explainer._generate_local_sample(row) scored_local_sample = \ explainer._score_local_sample(local_sample, row[local_sample.columns]) weighted_scored_local_sample = \ explainer._calculate_distance_weights(0, scored_local_sample) self.assertEqual(weighted_scored_local_sample.shape[1], local_sample.shape[1] + 2) self.assertEqual(weighted_scored_local_sample.columns[-1], 'distance') del explainer
def test_regress_w_discretize(self): row = self.frame.iloc[0, :] explainer = LIMEExplainer(training_frame=self.frame, X=self.X, model=self.model, discretize=self.discretize) local_sample = explainer._generate_local_sample(row) scored_local_sample = \ explainer._score_local_sample(local_sample, row[local_sample.columns]) weighted_scored_local_sample = \ explainer._calculate_distance_weights(0, scored_local_sample) discretized_weighted_scored_local_sample = \ explainer._discretize_numeric(weighted_scored_local_sample) disc_row = pd.DataFrame(columns=self.X) for name in self.discretize: disc_row[name] = pd.cut(pd.Series(row[name]), bins=explainer.bins_dict[name]) not_in = list(set(self.X) - set(self.discretize)) disc_row[not_in] = row[not_in].values lime = explainer._regress(discretized_weighted_scored_local_sample, h2o.H2OFrame(disc_row)) self.assertTrue(explainer.discretize) self.assertIsNotNone(lime) self.assertAlmostEqual(0.9859272974096093, explainer.lime_r2) del explainer
def test_local_contrib_w_o_discretize(self): row = self.frame.iloc[0, :] explainer = LIMEExplainer(training_frame=self.frame, X=self.X, model=self.model) local_sample = explainer._generate_local_sample(row) scored_local_sample = \ explainer._score_local_sample(local_sample, row[local_sample.columns]) weighted_scored_local_sample = \ explainer._calculate_distance_weights(0, scored_local_sample) explainer.lime = \ explainer._regress(weighted_scored_local_sample, h2o.H2OFrame(pd.DataFrame(row).T)) self.assertEqual(explainer.reason_code_values.shape, (26, 2)) del explainer
def test_regress_w_o_discretize(self): row = self.frame.iloc[0, :] explainer = LIMEExplainer(training_frame=self.frame, X=self.X, model=self.model) local_sample = explainer._generate_local_sample(row) scored_local_sample = \ explainer._score_local_sample(local_sample, row[local_sample.columns]) weighted_scored_local_sample = \ explainer._calculate_distance_weights(0, scored_local_sample) lime = explainer._regress(weighted_scored_local_sample, h2o.H2OFrame(pd.DataFrame(row).T)) self.assertFalse(explainer.discretize) self.assertIsNotNone(lime) self.assertAlmostEqual(0.9649889709835218, explainer.lime_r2) del explainer
def test_discretize_numeric(self): row = self.frame.iloc[0, :] explainer = LIMEExplainer(training_frame=self.frame, X=self.X, model=self.model, discretize=self.discretize) N = explainer.N local_sample = explainer._generate_local_sample(row) scored_local_sample = \ explainer._score_local_sample(local_sample, row[local_sample.columns]) weighted_scored_local_sample = \ explainer._calculate_distance_weights(0, scored_local_sample) discretized_weighted_scored_local_sample = \ explainer._discretize_numeric(weighted_scored_local_sample) self.assertEqual(discretized_weighted_scored_local_sample.shape, (N, local_sample.shape[1] + 2)) self.assertEqual( discretized_weighted_scored_local_sample[ self.discretize].dtypes.unique()[0], 'category') del explainer