def test_load_smartpredictor_1(self): """ Unit test load_smartpredictor 1 """ xpl = SmartExplainer(features_dict={}) y_pred = pd.DataFrame(data=np.array([1, 2]), columns=['pred']) dataframe_x = pd.DataFrame([[1, 2, 4], [1, 2, 3]]) clf = cb.CatBoostClassifier(n_estimators=1).fit(dataframe_x, y_pred) xpl.compile(x=dataframe_x, y_pred=y_pred, model=clf) predictor = xpl.to_smartpredictor() current = Path(path.abspath(__file__)).parent.parent.parent if str(sys.version)[0:3] == '3.7': pkl_file = path.join(current, 'data/predictor_to_load_37.pkl') elif str(sys.version)[0:3] == '3.6': pkl_file = path.join(current, 'data/predictor_to_load_36.pkl') predictor2 = load_smartpredictor(pkl_file) attrib_predictor = [element for element in predictor.__dict__.keys()] attrib_predictor2 = [element for element in predictor2.__dict__.keys()] assert all(attrib in attrib_predictor2 for attrib in attrib_predictor) assert all(attrib2 in attrib_predictor for attrib2 in attrib_predictor2)
def test_display_dataset_analysis_3(self, mock_correlation_matrix): """ Test we don't have a problem when only categorical features """ df = self.df.copy() df['x1'] = 'a' df['x2'] = df['x2'].astype(str) encoder = OrdinalEncoder( cols=['x1', 'x2'], handle_unknown='ignore', return_df=True).fit(df) df = encoder.transform(df) clf = cb.CatBoostClassifier(n_estimators=1).fit(df[['x1', 'x2']], df['y']) xpl = SmartExplainer() xpl.compile(model=clf, x=df[['x1', 'x2']]) report = ProjectReport( explainer=xpl, project_info_file=os.path.join(current_path, '../../data/metadata.yaml'), x_train=df[['x1', 'x2']], ) report.display_dataset_analysis() self.assertEqual(mock_correlation_matrix.call_count, 0)
def compile_shapash_model(x, model): xpl = SmartExplainer() xpl.compile( x=x, model=model, ) return xpl
def test_compile_0(self, mock_apply_preprocessing, mock_choose_state): """ Unit test compile Parameters ---------- mock_apply_preprocessing : [type] [description] mock_choose_state : [type] [description] """ xpl = SmartExplainer() mock_state = Mock() mock_choose_state.return_value = mock_state model = lambda: None model.predict = types.MethodType(self.predict, model) mock_state.rank_contributions.return_value = 1, 2, 3 contributions = pd.DataFrame([[-0.1, 0.2, -0.3], [0.1, -0.2, 0.3]]) mock_state.validate_contributions.return_value = contributions mock_apply_preprocessing.return_value = contributions x_pred = pd.DataFrame([[1, 2, 3], [1, 2, 3]]) xpl.compile(x=x_pred, model=model, contributions=contributions) assert hasattr(xpl, 'state') assert xpl.state == mock_state assert hasattr(xpl, 'x_pred') pd.testing.assert_frame_equal(xpl.x_pred, x_pred) assert hasattr(xpl, 'contributions') pd.testing.assert_frame_equal(xpl.contributions, contributions) mock_choose_state.assert_called() mock_state.validate_contributions.assert_called() mock_apply_preprocessing.assert_called() mock_state.rank_contributions.assert_called() assert xpl._case == "regression"
def test_compile_3(self): """ Unit test compile 3 checking compile method without model """ df = pd.DataFrame(range(0, 21), columns=['id']) df['y'] = df['id'].apply(lambda x: 1 if x < 10 else 0) df['x1'] = np.random.randint(1, 123, df.shape[0]) df['x2'] = np.random.randint(1, 3, df.shape[0]) df = df.set_index('id') clf = cb.CatBoostClassifier(n_estimators=1).fit( df[['x1', 'x2']], df['y']) clf_explainer = shap.TreeExplainer(clf) contrib = pd.DataFrame([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]], columns=[ 'contribution_0', 'contribution_1', 'contribution_2', 'contribution_3' ], index=[0, 1, 2]) xpl = SmartExplainer() with self.assertRaises(ValueError): xpl.compile(model=clf, x=df[['x1', 'x2']], explainer=clf_explainer, contributions=contrib)
def test_check_y_pred_4(self): """ Unit test check y pred 4 """ xpl = SmartExplainer() xpl.y_pred = [0, 1] self.assertRaises(AttributeError, xpl.check_y_pred)
def test_check_label_dict_2(self): """ Unit test check label dict 2 """ xpl = SmartExplainer() xpl._case = 'regression' xpl.check_label_dict()
def test_check_features_dict_1(self): """ Unit test check features dict 1 """ xpl = SmartExplainer(features_dict={'Age': 'Age (Years Old)'}) xpl.columns_dict = {0: 'Age', 1: 'Education', 2: 'Sex'} xpl.check_features_dict() assert xpl.features_dict['Age'] == 'Age (Years Old)' assert xpl.features_dict['Education'] == 'Education'
def test_add_3(self): """ Unit test add 3 """ xpl = SmartExplainer() xpl.columns_dict = {0: 'Age', 1: 'Education', 2: 'Sex'} xpl.add(features_dict={'Age': 'Age (Years Old)'}) assert xpl.features_dict['Age'] == 'Age (Years Old)' assert xpl.features_dict['Education'] == 'Education'
def test_apply_preprocessing_1(self): """ Unit test apply preprocessing 1 """ xpl = SmartExplainer() contributions = [1, 2, 3] output = xpl.apply_preprocessing(contributions) expected = contributions self.assertListEqual(output, expected)
def test_check_features_desc_1(self): """ Unit test check features desc 1 """ xpl = SmartExplainer() xpl.x_pred = pd.DataFrame([[0.12, 0, 13, 1], [0.13, 1, 14, 1], [0.14, 1, 15, 1], [0.15, np.NaN, 13, 1]], columns=['col1', 'col2', 'col3', 'col4']) expected = {'col1': 4, 'col2': 2, 'col3': 3, 'col4': 1} assert xpl.check_features_desc() == expected
def test_apply_preprocessing_2(self): """ Unit test apply preprocessing 2 """ xpl = SmartExplainer() xpl.state = Mock() preprocessing = Mock() contributions = [1, 2, 3] xpl.apply_preprocessing(contributions, preprocessing) xpl.state.inverse_transform_contributions.assert_called()
def test_validate_contributions_1(self): """ Unit test validate contributions 1 """ xpl = SmartExplainer() contributions = [ np.array([[2, 1], [8, 4]]), np.array([[5, 5], [0, 0]]) ] model = Mock() model._classes = np.array([1, 3]) model.predict = types.MethodType(self.predict, model) model.predict_proba = types.MethodType(self.predict_proba, model) xpl.model = model xpl._case = "classification" xpl._classes = list(model._classes) xpl.state = xpl.choose_state(contributions) xpl.x_init = pd.DataFrame([[1, 2], [3, 4]], columns=['Col1', 'Col2'], index=['Id1', 'Id2']) expected_output = [ pd.DataFrame([[2, 1], [8, 4]], columns=['Col1', 'Col2'], index=['Id1', 'Id2']), pd.DataFrame([[5, 5], [0, 0]], columns=['Col1', 'Col2'], index=['Id1', 'Id2']) ] output = xpl.validate_contributions(contributions) assert len(expected_output) == len(output) test_list = [ pd.testing.assert_frame_equal(e, m) for e, m in zip(expected_output, output) ] assert all(x is None for x in test_list)
def test_compute_features_import_2(self): """ Unit test compute_features_import 2 Checking classification case """ xpl = SmartExplainer() contrib1 = pd.DataFrame( [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]], columns=['contribution_0', 'contribution_1', 'contribution_2', 'contribution_3'], index=[0, 1, 2] ) contrib2 = pd.DataFrame( [[13, 14, 15, 16], [17, 18, 19, 20], [21, 22, 23, 24]], columns=['contribution_0', 'contribution_1', 'contribution_2', 'contribution_3'], index=[0, 1, 2] ) contributions = [contrib1, contrib2] xpl.features_imp = None xpl.contributions = contributions xpl.state = xpl.choose_state(contributions) xpl._case = "classification" xpl.compute_features_import() expect1 = contrib1.abs().sum().sort_values(ascending=True) expect1 = expect1 / expect1.sum() expect2 = contrib2.abs().sum().sort_values(ascending=True) expect2 = expect2 / expect2.sum() assert expect1.equals(xpl.features_imp[0]) assert expect2.equals(xpl.features_imp[1])
def test_choose_state_2(self, mock_multi_decorator): """ Unit test choose state 2 Parameters ---------- mock_multi_decorator : [type] [description] """ xpl = SmartExplainer() xpl.choose_state([1, 2, 3]) mock_multi_decorator.assert_called()
def test_check_features_name_4(self): """ Unit test check features name 4 """ xpl = SmartExplainer() xpl.columns_dict = None xpl.features_dict = None feature_list = [1, 2, 4] output = xpl.check_features_name(feature_list) expected_output = feature_list np.testing.assert_array_equal(output, expected_output)
def setUp(self): df = pd.DataFrame(range(0, 21), columns=['id']) df['y'] = df['id'].apply(lambda x: 1 if x < 10 else 0) df['x1'] = np.random.randint(1, 123, df.shape[0]) df['x2'] = np.random.randint(1, 3, df.shape[0]) df = df.set_index('id') clf = cb.CatBoostClassifier(n_estimators=1).fit( df[['x1', 'x2']], df['y']) self.xpl = SmartExplainer() self.xpl.compile(model=clf, x=df[['x1', 'x2']]) self.df = df
def test_choose_state_1(self, mock_smart_state): """ Unit test choose state 1 Parameters ---------- mock_smart_state : [type] [description] """ xpl = SmartExplainer() xpl.choose_state('contributions') mock_smart_state.assert_called()
def test_check_model_1(self): """ Unit test check model 1 """ model = lambda: None model.predict = types.MethodType(self.predict, model) xpl = SmartExplainer() xpl.model = model xpl._case, xpl._classes = xpl.check_model() assert xpl._case == 'regression' assert xpl._classes is None
def __init__(self, *args, **kwargs): """ Constructor - loads a SmartExplainer object from the appropriate pickle """ self.xpl = SmartExplainer() contributions = pd.DataFrame([[-0.1, 0.2, -0.3], [0.1, -0.2, 0.3]]) y_pred = pd.DataFrame(data=np.array([1, 2]), columns=['pred']) dataframe_x = pd.DataFrame([[1, 2, 3], [1, 2, 3]]) self.xpl.compile(contributions=contributions, x=dataframe_x, y_pred=y_pred, model=LinearRegression()) self.xpl.filter(max_contrib=2) super(TestWebappSettings, self).__init__(*args, **kwargs)
def test_check_features_name_3(self): """ Unit test check features name 3 """ xpl = SmartExplainer() xpl.columns_dict = {0: 'tech_0', 1: 'tech_1', 2: 'tech_2'} xpl.inv_columns_dict = {v: k for k, v in xpl.columns_dict.items()} feature_list = ['tech_2'] output = xpl.check_features_name(feature_list) expected_output = [2] np.testing.assert_array_equal(output, expected_output)
def test_check_label_name_5(self): """ Unit test check label name 5 """ label_dict = {1: 'Age', 2: 'Education'} xpl = SmartExplainer(label_dict=label_dict) xpl.inv_label_dict = {v: k for k, v in xpl.label_dict.items()} xpl._classes = [1, 2] label = 'Absent' expected_msg = f"Label (Absent) not found for origin (value)" origin = 'value' self.assertRaisesWithMessage(expected_msg, xpl.check_label_name, **{'label': label, 'origin': origin})
def test_check_label_name_2(self): """ Unit test check label name 2 """ xpl = SmartExplainer(label_dict = None) xpl._classes = [1, 2] entry = 1 expected_num = 0 expected_code = 1 expected_value = 1 label_num, label_code, label_value = xpl.check_label_name(entry, 'code') assert expected_num == label_num assert expected_code == label_code assert expected_value == label_value
class TestWebappSettings(unittest.TestCase): """ Unit tests for webapp settings class Checks that the webapp settings remain valid whether the user input is valid or not """ def __init__(self, *args, **kwargs): """ Constructor - loads a SmartExplainer object from the appropriate pickle """ self.xpl = SmartExplainer() contributions = pd.DataFrame([[-0.1, 0.2, -0.3], [0.1, -0.2, 0.3]]) y_pred = pd.DataFrame(data=np.array([1, 2]), columns=['pred']) dataframe_x = pd.DataFrame([[1, 2, 3], [1, 2, 3]]) self.xpl.compile(contributions=contributions, x=dataframe_x, y_pred=y_pred, model=LinearRegression()) self.xpl.filter(max_contrib=2) super(TestWebappSettings, self).__init__(*args, **kwargs) def test_settings_types(self): """ Test settings dtypes (must be ints) """ settings = {'rows': None, 'points': 5200.4, 'violin': -1, 'features': "oui"} self.xpl.init_app(settings) print(self.xpl.smartapp.settings) assert all(isinstance(attrib, int) for k, attrib in self.xpl.smartapp.settings.items()) def test_settings_values(self): """ Test settings values (must be >0) """ settings = {'rows': 0, 'points': 5200.4, 'violin': -1, 'features': "oui"} self.xpl.init_app(settings) assert all(attrib > 0 for k, attrib in self.xpl.smartapp.settings.items()) def test_settings_keys(self): """ Test settings keys : the expected keys must be in the final settings dict, whatever the user input is """ settings = {'oui': 1, 1: 2, "a": []} self.xpl.init_app(settings) assert all(k in ['rows', 'points', 'violin', 'features'] for k in self.xpl.smartapp.settings)
def test_compile_1(self): """ Unit test compile 1 checking compile method without model """ df = pd.DataFrame(range(0, 21), columns=['id']) df['y'] = df['id'].apply(lambda x: 1 if x < 10 else 0) df['x1'] = np.random.randint(1, 123, df.shape[0]) df['x2'] = np.random.randint(1, 3, df.shape[0]) df = df.set_index('id') clf = cb.CatBoostClassifier(n_estimators=1).fit(df[['x1', 'x2']], df['y']) xpl = SmartExplainer() xpl.compile(model=clf, x=df[['x1', 'x2']]) assert xpl._case == "classification" self.assertListEqual(xpl._classes, [0, 1])
def test_adapt_contributions_2(self): """ Unit test 1 adapt_contributions Classification with one contribution pd.DataFrame """ xpl = SmartExplainer() contrib = pd.DataFrame([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]], columns=[ 'contribution_0', 'contribution_1', 'contribution_2', 'contribution_3' ], index=[0, 1, 2]) xpl._case = "regression" output = xpl.adapt_contributions(contrib) pd.testing.assert_frame_equal(contrib, output)
def test_save_load(self): """ Test save + load methods """ pkl_file, xpl = init_sme_to_pickle_test() xpl.save(pkl_file) xpl2 = SmartExplainer() xpl2.load(pkl_file) attrib_xpl = [element for element in xpl.__dict__.keys()] attrib_xpl2 = [element for element in xpl2.__dict__.keys()] assert all(attrib in attrib_xpl2 for attrib in attrib_xpl) assert all(attrib2 in attrib_xpl for attrib2 in attrib_xpl2) os.remove(pkl_file)
def test_load_1(self): """ Unit test load 1 """ temp, xpl = init_sme_to_pickle_test() xpl2 = SmartExplainer() current = Path(path.abspath(__file__)).parent.parent.parent pkl_file = path.join(current, 'data/xpl_to_load.pkl') xpl2.load(pkl_file) attrib_xpl = [element for element in xpl.__dict__.keys()] attrib_xpl2 = [element for element in xpl2.__dict__.keys()] assert all(attrib in attrib_xpl2 for attrib in attrib_xpl) assert all(attrib2 in attrib_xpl for attrib2 in attrib_xpl2)
def test_check_label_name_3(self): """ Unit test check label name 3 """ label_dict = {1: 'Age', 2: 'Education'} xpl = SmartExplainer(label_dict=label_dict) xpl.inv_label_dict = {v: k for k, v in xpl.label_dict.items()} xpl._classes = [1, 2] entry = 0 expected_num = 0 expected_code = 1 expected_value = 'Age' label_num, label_code, label_value = xpl.check_label_name(entry, 'num') assert expected_num == label_num assert expected_code == label_code assert expected_value == label_value
def compute_contributions(self, x, model, methods, preprocessing): """ Compute contributions based on specified methods Parameters ---------- x : pandas.DataFrame Prediction set. IMPORTANT: this should be the raw prediction set, whose values are seen by the end user. x is a preprocessed dataset: Shapash can apply the model to it model : model object Model used to consistency check. model object can also be used by some method to compute predict and predict_proba values methods : list, optional When contributions is None, list of methods to use to calculate contributions, by default ["shap", "acv"] preprocessing : category_encoders, ColumnTransformer, list, dict --> Differents types of preprocessing are available: - A single category_encoders (OrdinalEncoder/OnehotEncoder/BaseNEncoder/BinaryEncoder/TargetEncoder) - A single ColumnTransformer with scikit-learn encoding or category_encoders transformers - A list with multiple category_encoders with optional (dict, list of dict) - A list with a single ColumnTransformer with optional (dict, list of dict) - A dict - A list of dict Returns ------- contributions : dict Dict whose keys are method names and values are the corresponding contributions """ contributions = {} xpl = SmartExplainer() for backend in methods: xpl.compile(x=x, model=model, preprocessing=preprocessing, backend=backend) if xpl._case == "classification" and len(xpl._classes) == 2: contributions[backend] = xpl.contributions[1] elif xpl._case == "classification" and len(xpl._classes) > 2: raise AssertionError( "Multi-class classification is not supported") else: contributions[backend] = xpl.contributions return contributions