def test_indexname(self): ale_eff = aleplot_1D_continuous(X=self.X, model=self.model, feature="x1", grid_size=5, include_CI=False) self.assertEqual(ale_eff.index.name, "x1")
def test_1D_continuous_rug_plot(self): ale_eff = aleplot_1D_continuous( X=self.X_cleaned, model=self.model, feature="x1", grid_size=5, include_CI=True, ) fig, ax = plot_1D_continuous_eff(ale_eff, self.X_cleaned) ## the rug rug_plot_data = ax.lines[1].get_xydata() # a line for each data point in X self.assertEqual(rug_plot_data.shape[0], self.X_cleaned.shape[0]) # y position is always at the lowest eff value (including the values # of the confidence interval) self.assertCountEqual( np.unique(rug_plot_data[:, 1]), [ale_eff.drop("size", axis=1, inplace=False).min().min()], ) # x position should always be plotted within the bin it belongs to # (less than the upper bin limit and more than the lower bin limit) self.assertTrue( np.all(ale_eff.index[pd.cut( self.X_cleaned["x1"], ale_eff.index, include_lowest=True).cat.codes + 1] > rug_plot_data[:, 0]) and np.all(ale_eff.index[pd.cut( self.X_cleaned["x1"], ale_eff.index, include_lowest=True).cat.codes] < rug_plot_data[:, 0]))
def test_binsizes(self): ale_eff = aleplot_1D_continuous(X=self.X, model=self.model, feature="x1", grid_size=5, include_CI=False) self.assertCountEqual(ale_eff.loc[:, "size"], [0.0, 40.0, 40.0, 40.0, 40.0, 40.0])
def test_outputshape_noCI(self): ale_eff = aleplot_1D_continuous(X=self.X, model=self.model, feature="x1", grid_size=5, include_CI=False) self.assertEqual(ale_eff.shape, (6, 2)) self.assertCountEqual(ale_eff.columns, ["eff", "size"])
def test_outputshape_withCI(self): ale_eff = aleplot_1D_continuous( X=self.X, model=self.model, feature="x1", grid_size=5, include_CI=True, C=0.9, ) self.assertEqual(ale_eff.shape, (6, 4)) self.assertCountEqual(ale_eff.columns, ["eff", "size", "lowerCI_90%", "upperCI_90%"])
def test_effvalues(self): ale_eff = aleplot_1D_continuous(X=self.X, model=self.model, feature="x1", grid_size=5, include_CI=False) self.assertCountEqual( np.round(ale_eff.loc[:, "eff"], 8), [ -0.35570033, -0.16996644, -0.19291121, 0.10414799, 0.24730329, 0.37855307 ], )
def test_1D_continuous_line_plot(self): ale_eff = aleplot_1D_continuous(X=self.X, model=self.model, feature="x1", grid_size=5, include_CI=True) fig, ax = plot_1D_continuous_eff(ale_eff, self.X) ## effect line eff_plt_data = ax.lines[0].get_xydata() # the x values should be the bins self.assertCountEqual(eff_plt_data[:, 0], ale_eff.index) # the y values should be the effect self.assertCountEqual(eff_plt_data[:, 1], ale_eff.eff)
def test_effvalues(self): ale_eff = aleplot_1D_continuous( X=self.X_cleaned, model=self.model, feature="x1", grid_size=5, include_CI=False, ) self.assertCountEqual( np.round(ale_eff.loc[:, "eff"], 8), [ -0.3302859, -0.25946135, -0.03809224, 0.03292833, 0.27153761, 0.3164612 ], )
def test_1D_continuous_ci_plot(self): ale_eff = aleplot_1D_continuous(X=self.X, model=self.model, feature="x1", grid_size=5, include_CI=True) fig, ax = plot_1D_continuous_eff(ale_eff, self.X) ci_plot_data = pd.DataFrame( ax.collections[0].get_paths() [0].vertices).drop_duplicates().groupby(0).agg(['min', 'max']) ci_plot_data.index.name = 'x1' ci_plot_data.columns = ['lowerCI_95%', 'upperCI_95%'] self.assertTrue( np.all( ale_eff.loc[ale_eff.index[1]:, ['lowerCI_95%', 'upperCI_95%']] == ci_plot_data))
def test_bins(self): ale_eff = aleplot_1D_continuous(X=self.X, model=self.model, feature="x1", grid_size=5, include_CI=False) self.assertCountEqual( ale_eff.index, [ 0.0013107121819164735, 0.21205399821897986, 0.3905585553320686, 0.5561380185409515, 0.7797798975036754, 0.9986526271693825, ], )
def test_1D_continuous_ci_plot(self): ale_eff = aleplot_1D_continuous( X=self.X_cleaned, model=self.model, feature="x1", grid_size=5, include_CI=True, ) fig, ax = plot_1D_continuous_eff(ale_eff, self.X_cleaned) ci_plot_data = (pd.DataFrame( ax.collections[0].get_paths() [0].vertices).drop_duplicates().groupby(0).agg(["min", "max"])) ci_plot_data.index.name = "x1" ci_plot_data.columns = ["lowerCI_95%", "upperCI_95%"] self.assertTrue( np.all( ale_eff.loc[ale_eff.index[1]:, ["lowerCI_95%", "upperCI_95%"]] == ci_plot_data))
def test_CIvalues(self): ale_eff = aleplot_1D_continuous( X=self.X, model=self.model, feature="x1", grid_size=5, include_CI=True, C=0.9, ) # assert that the first bin do not have a CI self.assertTrue(np.isnan(ale_eff.loc[ale_eff.index[0], "lowerCI_90%"])) self.assertTrue(np.isnan(ale_eff.loc[ale_eff.index[0], "upperCI_90%"])) # check the values of the CI self.assertCountEqual( np.round(ale_eff.loc[ale_eff.index[1]:, "lowerCI_90%"], 8), [-0.21966029, -0.27471201, -0.01534647, 0.20038572, 0.30378132], ) self.assertCountEqual( np.round(ale_eff.loc[ale_eff.index[1]:, "upperCI_90%"], 8), [-0.12027259, -0.11111041, 0.22364245, 0.29422086, 0.45332483], )
def test_CIvalues(self): ale_eff = aleplot_1D_continuous( X=self.X_cleaned, model=self.model, feature="x1", grid_size=5, include_CI=True, C=0.9, ) # assert that the first bin do not have a CI self.assertTrue(np.isnan(ale_eff.loc[ale_eff.index[0], "lowerCI_90%"])) self.assertTrue(np.isnan(ale_eff.loc[ale_eff.index[0], "upperCI_90%"])) # check the values of the CI self.assertCountEqual( np.round(ale_eff.loc[ale_eff.index[1]:, "lowerCI_90%"], 8), [-0.37210104, -0.08077478, -0.00175768, 0.20772107, 0.24621853], ) self.assertCountEqual( np.round(ale_eff.loc[ale_eff.index[1]:, "upperCI_90%"], 8), [-0.14682166, 0.00459031, 0.06761434, 0.33535415, 0.38670386], )
def test_1D_continuous_rug_plot(self): ale_eff = aleplot_1D_continuous(X=self.X, model=self.model, feature="x1", grid_size=5, include_CI=True) fig, ax = plot_1D_continuous_eff(ale_eff, self.X) ## the rug rug_plot_data = ax.lines[1].get_xydata() # a line for each data point in X self.assertEqual(rug_plot_data.shape[0], self.X.shape[0]) # y position is always at the lower eff value self.assertCountEqual(np.unique(rug_plot_data[:, 1]), [ale_eff.eff.min()]) # x position should always be plotted within the bin it belongs to # (less than the upper bin limit and more than the lower bin limit) self.assertTrue( np.all(ale_eff.index[pd.cut( self.X["x1"], ale_eff.index, include_lowest=True).cat.codes + 1] > rug_plot_data[:, 0]) and np.all(ale_eff.index[pd.cut( self.X["x1"], ale_eff.index, include_lowest=True).cat.codes] < rug_plot_data[:, 0]))
def test_exceptions(self): # dataset should be compatible with the model with self.assertRaises(Exception) as mod_ex_2: aleplot_1D_continuous(self.X, self.model, "x1") mod_ex_msg = "Please check that your model is fitted, and accepts X as input." self.assertEqual(mod_ex_2.exception.args[0], mod_ex_msg)