예제 #1
0
 def test_select_neighbours_5(self):
     # wrong distance function given
     with self.assertRaises(ValueError) as c:
         select_neighbours(self.x,
                           np.array([4, 3, 2]),
                           n=1,
                           dist_fun='euclidean')
예제 #2
0
 def test_select_neighbours_8(self):
     sample_x = select_neighbours(pd.DataFrame(self.x,
                                               columns=['a', 'b', 'c']),
                                  [4, 1, 5],
                                  n=10,
                                  selected_variables=['a', 'd'])
     sample_x2 = select_neighbours(pd.DataFrame(self.x), [4, 1, 5], n=10)
     np.testing.assert_array_equal(sample_x, sample_x2)
예제 #3
0
 def test_select_neighbours(self):
     neighbours = select_neighbours(self.x,
                                    self.x[0],
                                    dist_fun=euclidean_distances,
                                    n=1)
     neighbours2 = select_neighbours(self.x,
                                     self.x[0],
                                     dist_fun='gower',
                                     n=1)
     self.assertSequenceEqual(list(neighbours.iloc[0]), list(self.x[0]))
     self.assertSequenceEqual(list(neighbours2.iloc[0]), list(self.x[0]))
예제 #4
0
 def test_select_neighbours_2(self):
     (_, m) = self.x.shape
     size = 3
     neighbours = select_neighbours(self.x,
                                    np.array([4, 3, 2]),
                                    dist_fun=euclidean_distances,
                                    n=size)
     self.assertEqual(neighbours.shape, (size, m))
     neighbours2 = select_neighbours(self.x,
                                     np.array([4, 3, 2]),
                                     dist_fun='gower',
                                     n=size)
     self.assertEqual(neighbours2.shape, (size, m))
예제 #5
0
 def test_select_neighbours_3(self):
     sample_x, sample_y = select_neighbours(self.x,
                                            np.array([4, 3, 2]),
                                            y=self.y,
                                            n=3)
     pos = list(self.y).index(sample_y[1])
     self.assertSequenceEqual(list(sample_x.iloc[1]), list(self.x[pos]))
예제 #6
0
 def test_select_neighbours_7(self):
     sample_x = select_neighbours(pd.DataFrame(self.x,
                                               columns=['a', 'b', 'c']),
                                  [4, 1, 5],
                                  n=2,
                                  selected_variables=['a', 'b'])
     self.assertEqual(sample_x.shape, (2, 3))
예제 #7
0
 def test_select_neighbours_10(self):
     df = pd.DataFrame({
         'a': list(range(100)),
         'b': 11,
         'c': np.arange(0, 200, 2) / 7
     })
     y = pd.Series(range(100))
     sample_x, sample_y = select_neighbours(df, [3, 11, 7.4], y, n=5)
     self.assertEqual(sample_x.shape, (5, 3))
     self.assertEqual(len(sample_y), 5)
     np.testing.assert_array_equal(sample_x['a'], sample_y)
예제 #8
0
 def test_regression_3(self):
     variable_names = self.variable_names
     neighbours = select_neighbours(self.X_train,
                                    self.X_train[0],
                                    variable_names=variable_names,
                                    selected_variables=variable_names,
                                    n=15)
     cp3 = individual_variable_profile(self.explainer_rf,
                                       neighbours,
                                       variables=['LSTAT', 'RM'],
                                       variable_splits={
                                           'LSTAT': [10, 20, 30],
                                           'RM': [4, 5, 6, 7]
                                       })
     self.assertEqual(cp3.selected_variables, ['LSTAT', 'RM'])
     # num of different values in splits
     self.assertEqual(len(cp3.profile), 15 * 7)
예제 #9
0
    (gb_model, _, _, _) = gradient_boosting_model()
    (svm_model, _, _, _) = supported_vector_machines_model()

    explainer_linear = explain(linear_model, variable_names, data, y)
    explainer_gb = explain(gb_model, variable_names, data, y)
    explainer_svm = explain(svm_model, variable_names, data, y)

    # single profile
    cp_1 = individual_variable_profile(explainer_gb, x[0], y[0])
    plot(cp_1,
         destination="notebook",
         selected_variables=["bmi"],
         print_observations=False)

    # local fit
    neighbours_x, neighbours_y = select_neighbours(x, x[10], y=y, n=10)
    cp_2 = individual_variable_profile(explainer_gb, neighbours_x,
                                       neighbours_y)
    plot(cp_2,
         show_residuals=True,
         selected_variables=["age"],
         print_observations=False,
         color_residuals='red',
         plot_title='')

    # aggregate profiles
    plot(cp_2,
         aggregate_profiles="mean",
         selected_variables=["age"],
         color_pdps='black',
         size_pdps=6,
예제 #10
0
 def test_select_neighbours_6(self):
     sample_x = select_neighbours(pd.DataFrame(self.x),
                                  np.array([4, 3, 2]),
                                  n=300)
     self.assertEqual(len(sample_x), len(self.x))
예제 #11
0
 def test_select_neighbours_4(self):
     # it logs warning
     sample_x = select_neighbours(self.x, np.array([4, 3, 2]), n=300)
     self.assertEqual(len(sample_x), len(self.x))
if __name__ == "__main__":
    (model, data, labels, variable_names) = random_forest_regression()
    explainer_rf = explain(model, variable_names, data, labels)

    cp_profile = individual_variable_profile(explainer_rf,
                                             X_train[0],
                                             y=y_train[0],
                                             variables=['TAX', 'CRIM'])
    plot(cp_profile)

    sample = select_sample(X_train, n=3)
    cp2 = individual_variable_profile(explainer_rf,
                                      sample,
                                      variables=['TAX', 'CRIM'])
    plot(cp2)

    neighbours = select_neighbours(X_train,
                                   X_train[0],
                                   variable_names=variable_names,
                                   selected_variables=variable_names,
                                   n=15)
    cp3 = individual_variable_profile(explainer_rf,
                                      neighbours,
                                      variables=['LSTAT', 'RM'],
                                      variable_splits={
                                          'LSTAT': [10, 20, 30],
                                          'RM': [4, 5, 6, 7]
                                      })
    plot(cp3)