Example #1
0
    def test_load_smartpredictor_1(self):
        """
        Unit test load_smartpredictor 1
        """
        xpl = SmartExplainer(features_dict={})
        y_pred = pd.DataFrame(data=np.array([1, 2]), columns=['pred'])
        dataframe_x = pd.DataFrame([[1, 2, 4], [1, 2, 3]])
        clf = cb.CatBoostClassifier(n_estimators=1).fit(dataframe_x, y_pred)
        xpl.compile(x=dataframe_x, y_pred=y_pred, model=clf)
        predictor = xpl.to_smartpredictor()

        current = Path(path.abspath(__file__)).parent.parent.parent
        if str(sys.version)[0:3] == '3.7':
            pkl_file = path.join(current, 'data/predictor_to_load_37.pkl')
        elif str(sys.version)[0:3] == '3.6':
            pkl_file = path.join(current, 'data/predictor_to_load_36.pkl')

        predictor2 = load_smartpredictor(pkl_file)

        attrib_predictor = [element for element in predictor.__dict__.keys()]
        attrib_predictor2 = [element for element in predictor2.__dict__.keys()]

        assert all(attrib in attrib_predictor2 for attrib in attrib_predictor)
        assert all(attrib2 in attrib_predictor
                   for attrib2 in attrib_predictor2)
    def test_display_dataset_analysis_3(self, mock_correlation_matrix):
        """
        Test we don't have a problem when only categorical features
        """
        df = self.df.copy()
        df['x1'] = 'a'
        df['x2'] = df['x2'].astype(str)
        encoder = OrdinalEncoder(
            cols=['x1', 'x2'],
            handle_unknown='ignore',
            return_df=True).fit(df)

        df = encoder.transform(df)

        clf = cb.CatBoostClassifier(n_estimators=1).fit(df[['x1', 'x2']], df['y'])
        xpl = SmartExplainer()
        xpl.compile(model=clf, x=df[['x1', 'x2']])
        report = ProjectReport(
            explainer=xpl,
            project_info_file=os.path.join(current_path, '../../data/metadata.yaml'),
            x_train=df[['x1', 'x2']],
        )

        report.display_dataset_analysis()

        self.assertEqual(mock_correlation_matrix.call_count, 0)
def compile_shapash_model(x, model):
    xpl = SmartExplainer()
    xpl.compile(
        x=x,
        model=model,
    )
    return xpl
Example #4
0
 def test_compile_0(self, mock_apply_preprocessing, mock_choose_state):
     """
     Unit test compile
     Parameters
     ----------
     mock_apply_preprocessing : [type]
         [description]
     mock_choose_state : [type]
         [description]
     """
     xpl = SmartExplainer()
     mock_state = Mock()
     mock_choose_state.return_value = mock_state
     model = lambda: None
     model.predict = types.MethodType(self.predict, model)
     mock_state.rank_contributions.return_value = 1, 2, 3
     contributions = pd.DataFrame([[-0.1, 0.2, -0.3], [0.1, -0.2, 0.3]])
     mock_state.validate_contributions.return_value = contributions
     mock_apply_preprocessing.return_value = contributions
     x_pred = pd.DataFrame([[1, 2, 3], [1, 2, 3]])
     xpl.compile(x=x_pred, model=model, contributions=contributions)
     assert hasattr(xpl, 'state')
     assert xpl.state == mock_state
     assert hasattr(xpl, 'x_pred')
     pd.testing.assert_frame_equal(xpl.x_pred, x_pred)
     assert hasattr(xpl, 'contributions')
     pd.testing.assert_frame_equal(xpl.contributions, contributions)
     mock_choose_state.assert_called()
     mock_state.validate_contributions.assert_called()
     mock_apply_preprocessing.assert_called()
     mock_state.rank_contributions.assert_called()
     assert xpl._case == "regression"
    def test_compile_3(self):
        """
        Unit test compile 3
        checking compile method without model
        """
        df = pd.DataFrame(range(0, 21), columns=['id'])
        df['y'] = df['id'].apply(lambda x: 1 if x < 10 else 0)
        df['x1'] = np.random.randint(1, 123, df.shape[0])
        df['x2'] = np.random.randint(1, 3, df.shape[0])
        df = df.set_index('id')
        clf = cb.CatBoostClassifier(n_estimators=1).fit(
            df[['x1', 'x2']], df['y'])
        clf_explainer = shap.TreeExplainer(clf)

        contrib = pd.DataFrame([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]],
                               columns=[
                                   'contribution_0', 'contribution_1',
                                   'contribution_2', 'contribution_3'
                               ],
                               index=[0, 1, 2])

        xpl = SmartExplainer()
        with self.assertRaises(ValueError):
            xpl.compile(model=clf,
                        x=df[['x1', 'x2']],
                        explainer=clf_explainer,
                        contributions=contrib)
Example #6
0
 def test_check_y_pred_4(self):
     """
     Unit test check y pred 4
     """
     xpl = SmartExplainer()
     xpl.y_pred = [0, 1]
     self.assertRaises(AttributeError, xpl.check_y_pred)
Example #7
0
 def test_check_label_dict_2(self):
     """
     Unit test check label dict 2
     """
     xpl = SmartExplainer()
     xpl._case = 'regression'
     xpl.check_label_dict()
Example #8
0
 def test_check_features_dict_1(self):
     """
     Unit test check features dict 1
     """
     xpl = SmartExplainer(features_dict={'Age': 'Age (Years Old)'})
     xpl.columns_dict = {0: 'Age', 1: 'Education', 2: 'Sex'}
     xpl.check_features_dict()
     assert xpl.features_dict['Age'] == 'Age (Years Old)'
     assert xpl.features_dict['Education'] == 'Education'
Example #9
0
 def test_add_3(self):
     """
     Unit test add 3
     """
     xpl = SmartExplainer()
     xpl.columns_dict = {0: 'Age', 1: 'Education', 2: 'Sex'}
     xpl.add(features_dict={'Age': 'Age (Years Old)'})
     assert xpl.features_dict['Age'] == 'Age (Years Old)'
     assert xpl.features_dict['Education'] == 'Education'
Example #10
0
 def test_apply_preprocessing_1(self):
     """
     Unit test apply preprocessing 1
     """
     xpl = SmartExplainer()
     contributions = [1, 2, 3]
     output = xpl.apply_preprocessing(contributions)
     expected = contributions
     self.assertListEqual(output, expected)
 def test_check_features_desc_1(self):
     """
     Unit test check features desc 1
     """
     xpl = SmartExplainer()
     xpl.x_pred = pd.DataFrame([[0.12, 0, 13, 1], [0.13, 1, 14, 1],
                                [0.14, 1, 15, 1], [0.15, np.NaN, 13, 1]],
                               columns=['col1', 'col2', 'col3', 'col4'])
     expected = {'col1': 4, 'col2': 2, 'col3': 3, 'col4': 1}
     assert xpl.check_features_desc() == expected
Example #12
0
 def test_apply_preprocessing_2(self):
     """
     Unit test apply preprocessing 2
     """
     xpl = SmartExplainer()
     xpl.state = Mock()
     preprocessing = Mock()
     contributions = [1, 2, 3]
     xpl.apply_preprocessing(contributions, preprocessing)
     xpl.state.inverse_transform_contributions.assert_called()
 def test_validate_contributions_1(self):
     """
     Unit test validate contributions 1
     """
     xpl = SmartExplainer()
     contributions = [
         np.array([[2, 1], [8, 4]]),
         np.array([[5, 5], [0, 0]])
     ]
     model = Mock()
     model._classes = np.array([1, 3])
     model.predict = types.MethodType(self.predict, model)
     model.predict_proba = types.MethodType(self.predict_proba, model)
     xpl.model = model
     xpl._case = "classification"
     xpl._classes = list(model._classes)
     xpl.state = xpl.choose_state(contributions)
     xpl.x_init = pd.DataFrame([[1, 2], [3, 4]],
                               columns=['Col1', 'Col2'],
                               index=['Id1', 'Id2'])
     expected_output = [
         pd.DataFrame([[2, 1], [8, 4]],
                      columns=['Col1', 'Col2'],
                      index=['Id1', 'Id2']),
         pd.DataFrame([[5, 5], [0, 0]],
                      columns=['Col1', 'Col2'],
                      index=['Id1', 'Id2'])
     ]
     output = xpl.validate_contributions(contributions)
     assert len(expected_output) == len(output)
     test_list = [
         pd.testing.assert_frame_equal(e, m)
         for e, m in zip(expected_output, output)
     ]
     assert all(x is None for x in test_list)
Example #14
0
 def test_compute_features_import_2(self):
     """
     Unit test compute_features_import 2
     Checking classification case
     """
     xpl = SmartExplainer()
     contrib1 = pd.DataFrame(
         [[1, 2, 3, 4],
          [5, 6, 7, 8],
          [9, 10, 11, 12]],
         columns=['contribution_0', 'contribution_1', 'contribution_2', 'contribution_3'],
         index=[0, 1, 2]
     )
     contrib2 = pd.DataFrame(
         [[13, 14, 15, 16],
          [17, 18, 19, 20],
          [21, 22, 23, 24]],
         columns=['contribution_0', 'contribution_1', 'contribution_2', 'contribution_3'],
         index=[0, 1, 2]
     )
     contributions = [contrib1, contrib2]
     xpl.features_imp = None
     xpl.contributions = contributions
     xpl.state = xpl.choose_state(contributions)
     xpl._case = "classification"
     xpl.compute_features_import()
     expect1 = contrib1.abs().sum().sort_values(ascending=True)
     expect1 = expect1 / expect1.sum()
     expect2 = contrib2.abs().sum().sort_values(ascending=True)
     expect2 = expect2 / expect2.sum()
     assert expect1.equals(xpl.features_imp[0])
     assert expect2.equals(xpl.features_imp[1])
Example #15
0
 def test_choose_state_2(self, mock_multi_decorator):
     """
     Unit test choose state 2
     Parameters
     ----------
     mock_multi_decorator : [type]
         [description]
     """
     xpl = SmartExplainer()
     xpl.choose_state([1, 2, 3])
     mock_multi_decorator.assert_called()
Example #16
0
 def test_check_features_name_4(self):
     """
     Unit test check features name 4
     """
     xpl = SmartExplainer()
     xpl.columns_dict = None
     xpl.features_dict = None
     feature_list = [1, 2, 4]
     output = xpl.check_features_name(feature_list)
     expected_output = feature_list
     np.testing.assert_array_equal(output, expected_output)
Example #17
0
 def setUp(self):
     df = pd.DataFrame(range(0, 21), columns=['id'])
     df['y'] = df['id'].apply(lambda x: 1 if x < 10 else 0)
     df['x1'] = np.random.randint(1, 123, df.shape[0])
     df['x2'] = np.random.randint(1, 3, df.shape[0])
     df = df.set_index('id')
     clf = cb.CatBoostClassifier(n_estimators=1).fit(
         df[['x1', 'x2']], df['y'])
     self.xpl = SmartExplainer()
     self.xpl.compile(model=clf, x=df[['x1', 'x2']])
     self.df = df
Example #18
0
 def test_choose_state_1(self, mock_smart_state):
     """
     Unit test choose state 1
     Parameters
     ----------
     mock_smart_state : [type]
         [description]
     """
     xpl = SmartExplainer()
     xpl.choose_state('contributions')
     mock_smart_state.assert_called()
Example #19
0
 def test_check_model_1(self):
     """
     Unit test check model 1
     """
     model = lambda: None
     model.predict = types.MethodType(self.predict, model)
     xpl = SmartExplainer()
     xpl.model = model
     xpl._case, xpl._classes = xpl.check_model()
     assert xpl._case == 'regression'
     assert xpl._classes is None
Example #20
0
 def __init__(self, *args, **kwargs):
     """
     Constructor - loads a SmartExplainer object from the appropriate pickle
     """
     self.xpl = SmartExplainer()
     contributions = pd.DataFrame([[-0.1, 0.2, -0.3], [0.1, -0.2, 0.3]])
     y_pred = pd.DataFrame(data=np.array([1, 2]), columns=['pred'])
     dataframe_x = pd.DataFrame([[1, 2, 3], [1, 2, 3]])
     self.xpl.compile(contributions=contributions, x=dataframe_x, y_pred=y_pred, model=LinearRegression())
     self.xpl.filter(max_contrib=2)
     super(TestWebappSettings, self).__init__(*args, **kwargs)
Example #21
0
 def test_check_features_name_3(self):
     """
     Unit test check features name 3
     """
     xpl = SmartExplainer()
     xpl.columns_dict = {0: 'tech_0', 1: 'tech_1', 2: 'tech_2'}
     xpl.inv_columns_dict = {v: k for k, v in xpl.columns_dict.items()}
     feature_list = ['tech_2']
     output = xpl.check_features_name(feature_list)
     expected_output = [2]
     np.testing.assert_array_equal(output, expected_output)
Example #22
0
 def test_check_label_name_5(self):
     """
     Unit test check label name 5
     """
     label_dict = {1: 'Age', 2: 'Education'}
     xpl = SmartExplainer(label_dict=label_dict)
     xpl.inv_label_dict = {v: k for k, v in xpl.label_dict.items()}
     xpl._classes = [1, 2]
     label = 'Absent'
     expected_msg = f"Label (Absent) not found for origin (value)"
     origin = 'value'
     self.assertRaisesWithMessage(expected_msg, xpl.check_label_name, **{'label': label, 'origin': origin})
Example #23
0
 def test_check_label_name_2(self):
     """
     Unit test check label name 2
     """
     xpl = SmartExplainer(label_dict = None)
     xpl._classes = [1, 2]
     entry = 1
     expected_num = 0
     expected_code = 1
     expected_value = 1
     label_num, label_code, label_value = xpl.check_label_name(entry, 'code')
     assert expected_num == label_num
     assert expected_code == label_code
     assert expected_value == label_value
Example #24
0
class TestWebappSettings(unittest.TestCase):
    """
    Unit tests for webapp settings class
    Checks that the webapp settings remain valid whether the user input is valid or not
    """
    def __init__(self, *args, **kwargs):
        """
        Constructor - loads a SmartExplainer object from the appropriate pickle
        """
        self.xpl = SmartExplainer()
        contributions = pd.DataFrame([[-0.1, 0.2, -0.3], [0.1, -0.2, 0.3]])
        y_pred = pd.DataFrame(data=np.array([1, 2]), columns=['pred'])
        dataframe_x = pd.DataFrame([[1, 2, 3], [1, 2, 3]])
        self.xpl.compile(contributions=contributions, x=dataframe_x, y_pred=y_pred, model=LinearRegression())
        self.xpl.filter(max_contrib=2)
        super(TestWebappSettings, self).__init__(*args, **kwargs)

    def test_settings_types(self):
        """
        Test settings dtypes (must be ints)
        """
        settings = {'rows': None,
                    'points': 5200.4,
                    'violin': -1,
                    'features': "oui"}
        self.xpl.init_app(settings)
        print(self.xpl.smartapp.settings)
        assert all(isinstance(attrib, int) for k, attrib in self.xpl.smartapp.settings.items())

    def test_settings_values(self):
        """
        Test settings values (must be >0)
        """
        settings = {'rows': 0,
                    'points': 5200.4,
                    'violin': -1,
                    'features': "oui"}
        self.xpl.init_app(settings)
        assert all(attrib > 0 for k, attrib in self.xpl.smartapp.settings.items())

    def test_settings_keys(self):
        """
        Test settings keys : the expected keys must be in the final settings dict, whatever the user input is
        """
        settings = {'oui': 1,
                    1: 2,
                    "a": []}
        self.xpl.init_app(settings)
        assert all(k in ['rows', 'points', 'violin', 'features'] for k in self.xpl.smartapp.settings)
Example #25
0
 def test_compile_1(self):
     """
     Unit test compile 1
     checking compile method without model
     """
     df = pd.DataFrame(range(0, 21), columns=['id'])
     df['y'] = df['id'].apply(lambda x: 1 if x < 10 else 0)
     df['x1'] = np.random.randint(1, 123, df.shape[0])
     df['x2'] = np.random.randint(1, 3, df.shape[0])
     df = df.set_index('id')
     clf = cb.CatBoostClassifier(n_estimators=1).fit(df[['x1', 'x2']], df['y'])
     xpl = SmartExplainer()
     xpl.compile(model=clf, x=df[['x1', 'x2']])
     assert xpl._case == "classification"
     self.assertListEqual(xpl._classes, [0, 1])
 def test_adapt_contributions_2(self):
     """
     Unit test 1 adapt_contributions
     Classification with one contribution pd.DataFrame
     """
     xpl = SmartExplainer()
     contrib = pd.DataFrame([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]],
                            columns=[
                                'contribution_0', 'contribution_1',
                                'contribution_2', 'contribution_3'
                            ],
                            index=[0, 1, 2])
     xpl._case = "regression"
     output = xpl.adapt_contributions(contrib)
     pd.testing.assert_frame_equal(contrib, output)
Example #27
0
    def test_save_load(self):
        """
        Test save + load methods
        """
        pkl_file, xpl = init_sme_to_pickle_test()
        xpl.save(pkl_file)
        xpl2 = SmartExplainer()
        xpl2.load(pkl_file)

        attrib_xpl = [element for element in xpl.__dict__.keys()]
        attrib_xpl2 = [element for element in xpl2.__dict__.keys()]

        assert all(attrib in attrib_xpl2 for attrib in attrib_xpl)
        assert all(attrib2 in attrib_xpl for attrib2 in attrib_xpl2)
        os.remove(pkl_file)
Example #28
0
    def test_load_1(self):
        """
        Unit test load 1
        """
        temp, xpl = init_sme_to_pickle_test()
        xpl2 = SmartExplainer()
        current = Path(path.abspath(__file__)).parent.parent.parent
        pkl_file = path.join(current, 'data/xpl_to_load.pkl')
        xpl2.load(pkl_file)

        attrib_xpl = [element for element in xpl.__dict__.keys()]
        attrib_xpl2 = [element for element in xpl2.__dict__.keys()]

        assert all(attrib in attrib_xpl2 for attrib in attrib_xpl)
        assert all(attrib2 in attrib_xpl for attrib2 in attrib_xpl2)
Example #29
0
 def test_check_label_name_3(self):
     """
     Unit test check label name 3
     """
     label_dict = {1: 'Age', 2: 'Education'}
     xpl = SmartExplainer(label_dict=label_dict)
     xpl.inv_label_dict = {v: k for k, v in xpl.label_dict.items()}
     xpl._classes = [1, 2]
     entry = 0
     expected_num = 0
     expected_code = 1
     expected_value = 'Age'
     label_num, label_code, label_value = xpl.check_label_name(entry, 'num')
     assert expected_num == label_num
     assert expected_code == label_code
     assert expected_value == label_value
Example #30
0
    def compute_contributions(self, x, model, methods, preprocessing):
        """
        Compute contributions based on specified methods

        Parameters
        ----------
        x : pandas.DataFrame
            Prediction set.
            IMPORTANT: this should be the raw prediction set, whose values are seen by the end user.
            x is a preprocessed dataset: Shapash can apply the model to it
        model : model object
            Model used to consistency check. model object can also be used by some method to compute
            predict and predict_proba values
        methods : list, optional
            When contributions is None, list of methods to use to calculate contributions, by default ["shap", "acv"]
        preprocessing : category_encoders, ColumnTransformer, list, dict
                --> Differents types of preprocessing are available:

                - A single category_encoders (OrdinalEncoder/OnehotEncoder/BaseNEncoder/BinaryEncoder/TargetEncoder)
                - A single ColumnTransformer with scikit-learn encoding or category_encoders transformers
                - A list with multiple category_encoders with optional (dict, list of dict)
                - A list with a single ColumnTransformer with optional (dict, list of dict)
                - A dict
                - A list of dict

        Returns
        -------
        contributions : dict
            Dict whose keys are method names and values are the corresponding contributions
        """
        contributions = {}
        xpl = SmartExplainer()

        for backend in methods:
            xpl.compile(x=x,
                        model=model,
                        preprocessing=preprocessing,
                        backend=backend)
            if xpl._case == "classification" and len(xpl._classes) == 2:
                contributions[backend] = xpl.contributions[1]
            elif xpl._case == "classification" and len(xpl._classes) > 2:
                raise AssertionError(
                    "Multi-class classification is not supported")
            else:
                contributions[backend] = xpl.contributions

        return contributions