def test_ice_binary(self, titanic_data, titanic_model, titanic_features): # binary feature grid_results, _data = _calc_ice_lines(feature_grid=0, data=titanic_data, model=titanic_model, model_features=titanic_features, n_classes=2, feature='Sex', feature_type='binary', predict_kwds={}, data_transformer=None, unit_test=True) expected = pd.DataFrame({ 0: { 0: 0.527877688407898, 150: 0.8060303926467896, 300: 0.7201764583587646, 450: 0.9118255376815796, 600: 0.8612496852874756, 750: 0.9331579208374023 } }) assert_frame_equal(grid_results.iloc[[0, 150, 300, 450, 600, 750]], expected, check_like=True, check_dtype=False) assert_array_equal(_data['Sex'].unique(), np.array([0]))
def test_ice_predict_kwds(self, titanic_data, titanic_model, titanic_features): # with predict_kwds grid_results, _ = _calc_ice_lines(feature_grid=0, data=titanic_data, model=titanic_model, model_features=titanic_features, n_classes=2, feature='Sex', feature_type='binary', predict_kwds={'ntree_limit': 10}, data_transformer=None, unit_test=True) expected = pd.DataFrame({ 0: { 0: 0.5039686560630798, 150: 0.6007370352745056, 300: 0.5556174516677856, 450: 0.643494725227356, 600: 0.643494725227356, 750: 0.65798020362854 } }) assert_frame_equal(grid_results.iloc[[0, 150, 300, 450, 600, 750]], expected, check_like=True, check_dtype=False)
def test_ice_numeric(self, titanic_data, titanic_model, titanic_features): # numeric feature grid_results, _data = _calc_ice_lines(feature_grid=10, data=titanic_data, model=titanic_model, model_features=titanic_features, n_classes=2, feature='Fare', feature_type='numeric', predict_kwds={}, data_transformer=None, unit_test=True) expected = pd.DataFrame({ 10: { 0: 0.10624270886182785, 150: 0.09951823949813843, 300: 0.6190056204795837, 450: 0.16398519277572632, 600: 0.7467048764228821, 750: 0.868721067905426 } }) assert_frame_equal(grid_results.iloc[[0, 150, 300, 450, 600, 750]], expected, check_like=True, check_dtype=False) assert_array_equal(_data['Fare'].unique(), np.array([10]))
def test_ice_onehot(self, titanic_data, titanic_model, titanic_features): # onehot encoding feature grid_results, _data = _calc_ice_lines( feature_grid='Embarked_C', data=titanic_data, model=titanic_model, model_features=titanic_features, n_classes=2, feature=['Embarked_C', 'Embarked_S', 'Embarked_Q'], feature_type='onehot', predict_kwds={}, data_transformer=None, unit_test=True) expected = pd.DataFrame({ 'Embarked_C': { 0: 0.19760717451572418, 150: 0.11059149354696274, 300: 0.7139607667922974, 450: 0.2575017809867859, 600: 0.9045996069908142, 750: 0.9531968832015991 } }) assert_frame_equal(grid_results.iloc[[0, 150, 300, 450, 600, 750]], expected, check_like=True, check_dtype=False) assert_array_equal( _data[['Embarked_C', 'Embarked_S', 'Embarked_Q']].mean().values, np.array([1, 0, 0]))
def test_ice_data_transformer(self, titanic_data, titanic_model, titanic_features): # with data_transformer def embark_change(df): df.loc[df['Embarked_C'] == 1, 'Fare'] = 10 df.loc[df['Embarked_S'] == 1, 'Fare'] = 20 df.loc[df['Embarked_Q'] == 1, 'Fare'] = 30 return df grid_results, _data = _calc_ice_lines( feature_grid='Embarked_C', data=titanic_data, model=titanic_model, model_features=titanic_features, n_classes=2, feature=['Embarked_C', 'Embarked_S', 'Embarked_Q'], feature_type='onehot', predict_kwds={}, data_transformer=embark_change, unit_test=True) expected = pd.DataFrame({ 'Embarked_C': { 0: 0.20869030058383942, 150: 0.10480280220508575, 300: 0.6179739832878113, 450: 0.18637187778949738, 600: 0.8106594085693359, 750: 0.8973860740661621 } }) assert_frame_equal(grid_results.iloc[[0, 150, 300, 450, 600, 750]], expected, check_like=True, check_dtype=False) assert_array_equal(_data['Fare'].unique(), np.array([10.]))
def test_calc_ice_lines_regression(ross_data, ross_model, ross_features): grid_results, _data = _calc_ice_lines(feature_grid=1, data=ross_data, model=ross_model, model_features=ross_features, n_classes=0, feature='SchoolHoliday', feature_type='binary', predict_kwds={}, data_transformer=None, unit_test=True) assert_array_equal(_data['SchoolHoliday'].unique(), np.array([1])) expected = pd.DataFrame({ 1: { 0: 8802.910080560769, 100000: 8293.287914628107, 200000: 5352.321273982288, 300000: 5412.1717528683475, 400000: 7933.070072150073, 500000: 7520.956055932758, 600000: 5493.134809064146, 700000: 5528.43699339258, 800000: 4877.434213535265 } }) assert_frame_equal(grid_results.iloc[[ 0, 100000, 200000, 300000, 400000, 500000, 600000, 700000, 800000 ]], expected, check_like=True, check_dtype=False)
def test_calc_ice_lines_regression(ross_data, ross_model, ross_features): grid_results, _data = _calc_ice_lines( feature_grid=1, data=ross_data, model=ross_model, model_features=ross_features, n_classes=0, feature="SchoolHoliday", feature_type="binary", predict_kwds={}, data_transformer=None, unit_test=True, ) assert_array_equal(_data["SchoolHoliday"].unique(), np.array([1]))
def test_ice_predict_kwds(self, titanic_data, titanic_model, titanic_features): # with predict_kwds grid_results, _ = _calc_ice_lines( feature_grid=0, data=titanic_data, model=titanic_model, model_features=titanic_features, n_classes=2, feature="Sex", feature_type="binary", predict_kwds={"ntree_limit": 10}, data_transformer=None, unit_test=True, )
def test_ice_numeric(self, titanic_data, titanic_model, titanic_features): # numeric feature grid_results, _data = _calc_ice_lines( feature_grid=10, data=titanic_data, model=titanic_model, model_features=titanic_features, n_classes=2, feature="Fare", feature_type="numeric", predict_kwds={}, data_transformer=None, unit_test=True, ) assert_array_equal(_data["Fare"].unique(), np.array([10]))
def test_ice_binary(self, titanic_data, titanic_model, titanic_features): # binary feature grid_results, _data = _calc_ice_lines( feature_grid=0, data=titanic_data, model=titanic_model, model_features=titanic_features, n_classes=2, feature="Sex", feature_type="binary", predict_kwds={}, data_transformer=None, unit_test=True, ) assert_array_equal(_data["Sex"].unique(), np.array([0]))
def test_calc_ice_lines_multiclass(otto_data, otto_model, otto_features): grid_results, _data = _calc_ice_lines( feature_grid=1, data=otto_data, model=otto_model, model_features=otto_features, n_classes=9, feature="feat_67", feature_type="numeric", predict_kwds={}, data_transformer=None, unit_test=True, ) assert len(grid_results) == 9 assert_array_equal(_data["feat_67"].unique(), np.array([1]))
def test_ice_onehot(self, titanic_data, titanic_model, titanic_features): # onehot encoding feature grid_results, _data = _calc_ice_lines( feature_grid="Embarked_C", data=titanic_data, model=titanic_model, model_features=titanic_features, n_classes=2, feature=["Embarked_C", "Embarked_S", "Embarked_Q"], feature_type="onehot", predict_kwds={}, data_transformer=None, unit_test=True, ) assert_array_equal( _data[["Embarked_C", "Embarked_S", "Embarked_Q"]].mean().values, np.array([1, 0, 0]), )
def test_calc_ice_lines_multiclass(otto_data, otto_model, otto_features): grid_results, _data = _calc_ice_lines( feature_grid=1, data=otto_data, model=otto_model, model_features=otto_features, n_classes=9, feature='feat_67', feature_type='numeric', predict_kwds={}, data_transformer=None, unit_test=True) assert len(grid_results) == 9 assert_array_equal(_data['feat_67'].unique(), np.array([1])) expected_target_0 = pd.DataFrame({1: {0: 0.56, 10000: 0.0, 20000: 0.0, 30000: 0.01, 40000: 0.01, 50000: 0.03, 60000: 0.04}}) assert_frame_equal(grid_results[0].iloc[[0, 10000, 20000, 30000, 40000, 50000, 60000]], expected_target_0, check_like=True, check_dtype=False) expected_target_3 = pd.DataFrame({1: {0: 0.0, 10000: 0.04, 20000: 0.03, 30000: 0.02, 40000: 0.0, 50000: 0.0, 60000: 0.0}}) assert_frame_equal(grid_results[3].iloc[[0, 10000, 20000, 30000, 40000, 50000, 60000]], expected_target_3, check_like=True, check_dtype=False) expected_target_7 = pd.DataFrame({1: {0: 0.0, 10000: 0.03, 20000: 0.01, 30000: 0.02, 40000: 0.03, 50000: 0.9, 60000: 0.02}}) assert_frame_equal(grid_results[7].iloc[[0, 10000, 20000, 30000, 40000, 50000, 60000]], expected_target_7, check_like=True, check_dtype=False)
def test_ice_data_transformer(self, titanic_data, titanic_model, titanic_features): # with data_transformer def embark_change(df): df.loc[df["Embarked_C"] == 1, "Fare"] = 10 df.loc[df["Embarked_S"] == 1, "Fare"] = 20 df.loc[df["Embarked_Q"] == 1, "Fare"] = 30 return df grid_results, _data = _calc_ice_lines( feature_grid="Embarked_C", data=titanic_data, model=titanic_model, model_features=titanic_features, n_classes=2, feature=["Embarked_C", "Embarked_S", "Embarked_Q"], feature_type="onehot", predict_kwds={}, data_transformer=embark_change, unit_test=True, ) assert_array_equal(_data["Fare"].unique(), np.array([10.0]))