def test_select_neighbours_5(self): # wrong distance function given with self.assertRaises(ValueError) as c: select_neighbours(self.x, np.array([4, 3, 2]), n=1, dist_fun='euclidean')
def test_select_neighbours_8(self): sample_x = select_neighbours(pd.DataFrame(self.x, columns=['a', 'b', 'c']), [4, 1, 5], n=10, selected_variables=['a', 'd']) sample_x2 = select_neighbours(pd.DataFrame(self.x), [4, 1, 5], n=10) np.testing.assert_array_equal(sample_x, sample_x2)
def test_select_neighbours(self): neighbours = select_neighbours(self.x, self.x[0], dist_fun=euclidean_distances, n=1) neighbours2 = select_neighbours(self.x, self.x[0], dist_fun='gower', n=1) self.assertSequenceEqual(list(neighbours.iloc[0]), list(self.x[0])) self.assertSequenceEqual(list(neighbours2.iloc[0]), list(self.x[0]))
def test_select_neighbours_2(self): (_, m) = self.x.shape size = 3 neighbours = select_neighbours(self.x, np.array([4, 3, 2]), dist_fun=euclidean_distances, n=size) self.assertEqual(neighbours.shape, (size, m)) neighbours2 = select_neighbours(self.x, np.array([4, 3, 2]), dist_fun='gower', n=size) self.assertEqual(neighbours2.shape, (size, m))
def test_select_neighbours_3(self): sample_x, sample_y = select_neighbours(self.x, np.array([4, 3, 2]), y=self.y, n=3) pos = list(self.y).index(sample_y[1]) self.assertSequenceEqual(list(sample_x.iloc[1]), list(self.x[pos]))
def test_select_neighbours_7(self): sample_x = select_neighbours(pd.DataFrame(self.x, columns=['a', 'b', 'c']), [4, 1, 5], n=2, selected_variables=['a', 'b']) self.assertEqual(sample_x.shape, (2, 3))
def test_select_neighbours_10(self): df = pd.DataFrame({ 'a': list(range(100)), 'b': 11, 'c': np.arange(0, 200, 2) / 7 }) y = pd.Series(range(100)) sample_x, sample_y = select_neighbours(df, [3, 11, 7.4], y, n=5) self.assertEqual(sample_x.shape, (5, 3)) self.assertEqual(len(sample_y), 5) np.testing.assert_array_equal(sample_x['a'], sample_y)
def test_regression_3(self): variable_names = self.variable_names neighbours = select_neighbours(self.X_train, self.X_train[0], variable_names=variable_names, selected_variables=variable_names, n=15) cp3 = individual_variable_profile(self.explainer_rf, neighbours, variables=['LSTAT', 'RM'], variable_splits={ 'LSTAT': [10, 20, 30], 'RM': [4, 5, 6, 7] }) self.assertEqual(cp3.selected_variables, ['LSTAT', 'RM']) # num of different values in splits self.assertEqual(len(cp3.profile), 15 * 7)
(gb_model, _, _, _) = gradient_boosting_model() (svm_model, _, _, _) = supported_vector_machines_model() explainer_linear = explain(linear_model, variable_names, data, y) explainer_gb = explain(gb_model, variable_names, data, y) explainer_svm = explain(svm_model, variable_names, data, y) # single profile cp_1 = individual_variable_profile(explainer_gb, x[0], y[0]) plot(cp_1, destination="notebook", selected_variables=["bmi"], print_observations=False) # local fit neighbours_x, neighbours_y = select_neighbours(x, x[10], y=y, n=10) cp_2 = individual_variable_profile(explainer_gb, neighbours_x, neighbours_y) plot(cp_2, show_residuals=True, selected_variables=["age"], print_observations=False, color_residuals='red', plot_title='') # aggregate profiles plot(cp_2, aggregate_profiles="mean", selected_variables=["age"], color_pdps='black', size_pdps=6,
def test_select_neighbours_6(self): sample_x = select_neighbours(pd.DataFrame(self.x), np.array([4, 3, 2]), n=300) self.assertEqual(len(sample_x), len(self.x))
def test_select_neighbours_4(self): # it logs warning sample_x = select_neighbours(self.x, np.array([4, 3, 2]), n=300) self.assertEqual(len(sample_x), len(self.x))
if __name__ == "__main__": (model, data, labels, variable_names) = random_forest_regression() explainer_rf = explain(model, variable_names, data, labels) cp_profile = individual_variable_profile(explainer_rf, X_train[0], y=y_train[0], variables=['TAX', 'CRIM']) plot(cp_profile) sample = select_sample(X_train, n=3) cp2 = individual_variable_profile(explainer_rf, sample, variables=['TAX', 'CRIM']) plot(cp2) neighbours = select_neighbours(X_train, X_train[0], variable_names=variable_names, selected_variables=variable_names, n=15) cp3 = individual_variable_profile(explainer_rf, neighbours, variables=['LSTAT', 'RM'], variable_splits={ 'LSTAT': [10, 20, 30], 'RM': [4, 5, 6, 7] }) plot(cp3)