def test_display_dataset_analysis_3(self, mock_correlation_matrix):
        """
        Test we don't have a problem when only categorical features
        """
        df = self.df.copy()
        df['x1'] = 'a'
        df['x2'] = df['x2'].astype(str)
        encoder = OrdinalEncoder(
            cols=['x1', 'x2'],
            handle_unknown='ignore',
            return_df=True).fit(df)

        df = encoder.transform(df)

        clf = cb.CatBoostClassifier(n_estimators=1).fit(df[['x1', 'x2']], df['y'])
        xpl = SmartExplainer()
        xpl.compile(model=clf, x=df[['x1', 'x2']])
        report = ProjectReport(
            explainer=xpl,
            project_info_file=os.path.join(current_path, '../../data/metadata.yaml'),
            x_train=df[['x1', 'x2']],
        )

        report.display_dataset_analysis()

        self.assertEqual(mock_correlation_matrix.call_count, 0)
    def test_compile_3(self):
        """
        Unit test compile 3
        checking compile method without model
        """
        df = pd.DataFrame(range(0, 21), columns=['id'])
        df['y'] = df['id'].apply(lambda x: 1 if x < 10 else 0)
        df['x1'] = np.random.randint(1, 123, df.shape[0])
        df['x2'] = np.random.randint(1, 3, df.shape[0])
        df = df.set_index('id')
        clf = cb.CatBoostClassifier(n_estimators=1).fit(
            df[['x1', 'x2']], df['y'])
        clf_explainer = shap.TreeExplainer(clf)

        contrib = pd.DataFrame([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]],
                               columns=[
                                   'contribution_0', 'contribution_1',
                                   'contribution_2', 'contribution_3'
                               ],
                               index=[0, 1, 2])

        xpl = SmartExplainer()
        with self.assertRaises(ValueError):
            xpl.compile(model=clf,
                        x=df[['x1', 'x2']],
                        explainer=clf_explainer,
                        contributions=contrib)
def compile_shapash_model(x, model):
    xpl = SmartExplainer()
    xpl.compile(
        x=x,
        model=model,
    )
    return xpl
Example #4
0
    def test_to_smartpredictor_1(self):
        """
        Unit test 1  to_smartpredictor
        """
        df = pd.DataFrame(range(0, 5), columns=['id'])
        df['y'] = df['id'].apply(lambda x: 1 if x < 2 else 0)
        df['x1'] = np.random.randint(1, 123, df.shape[0])
        df['x2'] = ["S", "M", "S", "D", "M"]
        df = df.set_index('id')
        encoder = ce.OrdinalEncoder(cols=["x2"], handle_unknown="None")
        encoder_fitted = encoder.fit(df[["x1", "x2"]])
        df_encoded = encoder_fitted.transform(df[["x1", "x2"]])
        clf = cb.CatBoostClassifier(n_estimators=1).fit(df_encoded[['x1', 'x2']], df['y'])

        postprocessing = {"x2": {
            "type": "transcoding",
            "rule": {"S": "single", "M": "married", "D": "divorced"}}}
        xpl = SmartExplainer(features_dict={"x1": "age", "x2": "family_situation"})

        xpl.compile(model=clf,
                    x=df_encoded[['x1', 'x2']],
                    preprocessing=encoder_fitted,
                    postprocessing=postprocessing)
        predictor_1 = xpl.to_smartpredictor()

        xpl.mask_params = {
            'features_to_hide': None,
            'threshold': None,
            'positive': True,
            'max_contrib': 1
        }

        predictor_2 = xpl.to_smartpredictor()

        assert hasattr(predictor_1, 'model')
        assert hasattr(predictor_1, 'explainer')
        assert hasattr(predictor_1, 'features_dict')
        assert hasattr(predictor_1, 'label_dict')
        assert hasattr(predictor_1, '_case')
        assert hasattr(predictor_1, '_classes')
        assert hasattr(predictor_1, 'columns_dict')
        assert hasattr(predictor_1, 'features_types')
        assert hasattr(predictor_1, 'preprocessing')
        assert hasattr(predictor_1, 'postprocessing')
        assert hasattr(predictor_1, 'mask_params')
        assert hasattr(predictor_2, 'mask_params')

        assert predictor_1.model == xpl.model
        assert predictor_1.explainer == xpl.explainer
        assert predictor_1.features_dict == xpl.features_dict
        assert predictor_1.label_dict == xpl.label_dict
        assert predictor_1._case == xpl._case
        assert predictor_1._classes == xpl._classes
        assert predictor_1.columns_dict == xpl.columns_dict
        assert predictor_1.preprocessing == xpl.preprocessing
        assert predictor_1.postprocessing == xpl.postprocessing
        assert all(predictor_1.features_types[feature] == str(xpl.x_pred[feature].dtypes)
                   for feature in xpl.x_pred.columns )

        assert predictor_2.mask_params == xpl.mask_params
Example #5
0
 def test_compile_0(self, mock_apply_preprocessing, mock_choose_state):
     """
     Unit test compile
     Parameters
     ----------
     mock_apply_preprocessing : [type]
         [description]
     mock_choose_state : [type]
         [description]
     """
     xpl = SmartExplainer()
     mock_state = Mock()
     mock_choose_state.return_value = mock_state
     model = lambda: None
     model.predict = types.MethodType(self.predict, model)
     mock_state.rank_contributions.return_value = 1, 2, 3
     contributions = pd.DataFrame([[-0.1, 0.2, -0.3], [0.1, -0.2, 0.3]])
     mock_state.validate_contributions.return_value = contributions
     mock_apply_preprocessing.return_value = contributions
     x_pred = pd.DataFrame([[1, 2, 3], [1, 2, 3]])
     xpl.compile(x=x_pred, model=model, contributions=contributions)
     assert hasattr(xpl, 'state')
     assert xpl.state == mock_state
     assert hasattr(xpl, 'x_pred')
     pd.testing.assert_frame_equal(xpl.x_pred, x_pred)
     assert hasattr(xpl, 'contributions')
     pd.testing.assert_frame_equal(xpl.contributions, contributions)
     mock_choose_state.assert_called()
     mock_state.validate_contributions.assert_called()
     mock_apply_preprocessing.assert_called()
     mock_state.rank_contributions.assert_called()
     assert xpl._case == "regression"
Example #6
0
    def test_load_smartpredictor_1(self):
        """
        Unit test load_smartpredictor 1
        """
        xpl = SmartExplainer(features_dict={})
        y_pred = pd.DataFrame(data=np.array([1, 2]), columns=['pred'])
        dataframe_x = pd.DataFrame([[1, 2, 4], [1, 2, 3]])
        clf = cb.CatBoostClassifier(n_estimators=1).fit(dataframe_x, y_pred)
        xpl.compile(x=dataframe_x, y_pred=y_pred, model=clf)
        predictor = xpl.to_smartpredictor()

        current = Path(path.abspath(__file__)).parent.parent.parent
        if str(sys.version)[0:3] == '3.7':
            pkl_file = path.join(current, 'data/predictor_to_load_37.pkl')
        elif str(sys.version)[0:3] == '3.6':
            pkl_file = path.join(current, 'data/predictor_to_load_36.pkl')

        predictor2 = load_smartpredictor(pkl_file)

        attrib_predictor = [element for element in predictor.__dict__.keys()]
        attrib_predictor2 = [element for element in predictor2.__dict__.keys()]

        assert all(attrib in attrib_predictor2 for attrib in attrib_predictor)
        assert all(attrib2 in attrib_predictor
                   for attrib2 in attrib_predictor2)
 def test_to_pandas_2(self):
     """
     Unit test to_pandas :
     test to_pandas method in classification case with
     predict_proba output and column_dict attribute
     """
     xpl = SmartExplainer()
     contrib = pd.DataFrame(
         [[0.32230754, 0.1550689, 0.10183475, 0.05471339],
          [-0.58547512, -0.37050409, -0.07249285, 0.00171975],
          [-0.48666675, 0.25507156, -0.16968889, 0.0757443]],
         index=[0, 1, 2])
     model = lambda: None
     model._classes = np.array([1, 3])
     model.predict = types.MethodType(self.predict, model)
     model.predict_proba = types.MethodType(self.predict_proba, model)
     x = pd.DataFrame(
         [[3., 1., 22., 1.], [1., 2., 38., 2.], [3., 2., 26., 1.]],
         index=[0, 1, 2])
     pred = pd.DataFrame([3, 1, 1], columns=['pred'], index=[0, 1, 2])
     xpl.compile(contributions=contrib, x=x, model=model, y_pred=pred)
     xpl.columns_dict = {0: 'Pclass', 1: 'Sex', 2: 'Age', 3: 'Embarked'}
     xpl.features_dict = {
         'Pclass': 'Pclass',
         'Sex': 'Sex',
         'Age': 'Age',
         'Embarked': 'Embarked'
     }
     output = xpl.to_pandas(max_contrib=3, positive=True, proba=True)
     expected = pd.DataFrame([[
         3, 0.8, 'Pclass', 3.0, 0.32230754, 'Sex', 1.0, 0.1550689, 'Age',
         22.0, 0.10183475
     ],
                              [
                                  1, 0.3, 'Pclass', 1.0, 0.58547512, 'Sex',
                                  2.0, 0.37050409, 'Age', 38.0, 0.07249285
                              ],
                              [
                                  1, 0.4, 'Pclass', 3.0, 0.48666675, 'Age',
                                  26.0, 0.16968889, np.nan, np.nan, np.nan
                              ]],
                             columns=[
                                 'pred', 'proba', 'feature_1', 'value_1',
                                 'contribution_1', 'feature_2', 'value_2',
                                 'contribution_2', 'feature_3', 'value_3',
                                 'contribution_3'
                             ],
                             index=[0, 1, 2],
                             dtype=object)
     expected['pred'] = expected['pred'].astype(int)
     expected['proba'] = expected['proba'].astype(float)
     assert not pd.testing.assert_frame_equal(expected, output)
Example #8
0
    def test_predict_2(self):
        """
        Test predict method 2
        """
        xpl = SmartExplainer()
        X = pd.DataFrame([[1, 1, 1], [2, 2, 2], [3, 3, 3]])
        y_true = pd.DataFrame(data=np.array([1, 2, 3]), columns=['pred'])
        model = LinearRegression().fit(X, y_true)

        xpl.compile(x=X, model=model)
        xpl.predict()

        pd.testing.assert_frame_equal(xpl.y_pred, y_true, check_dtype=False)
Example #9
0
    def test_predict_1(self):
        """
        Test predict method 1
        """
        xpl = SmartExplainer()
        X = pd.DataFrame([[1, 1, 1], [2, 2, 2], [3, 3, 3]])
        y_true = pd.DataFrame(data=np.array([1, 2, 3]), columns=['pred'])
        y_false = pd.DataFrame(data=np.array([1, 2, 4]), columns=['pred'])
        model = LinearRegression().fit(X, y_true)

        xpl.compile(x=X, y_pred=y_false, model=model)
        xpl.predict()  # y_false should be replaced by predictions which are equal to y_true

        pd.testing.assert_frame_equal(xpl.y_pred, y_true, check_dtype=False)
Example #10
0
class TestWebappSettings(unittest.TestCase):
    """
    Unit tests for webapp settings class
    Checks that the webapp settings remain valid whether the user input is valid or not
    """
    def __init__(self, *args, **kwargs):
        """
        Constructor - loads a SmartExplainer object from the appropriate pickle
        """
        self.xpl = SmartExplainer()
        contributions = pd.DataFrame([[-0.1, 0.2, -0.3], [0.1, -0.2, 0.3]])
        y_pred = pd.DataFrame(data=np.array([1, 2]), columns=['pred'])
        dataframe_x = pd.DataFrame([[1, 2, 3], [1, 2, 3]])
        self.xpl.compile(contributions=contributions, x=dataframe_x, y_pred=y_pred, model=LinearRegression())
        self.xpl.filter(max_contrib=2)
        super(TestWebappSettings, self).__init__(*args, **kwargs)

    def test_settings_types(self):
        """
        Test settings dtypes (must be ints)
        """
        settings = {'rows': None,
                    'points': 5200.4,
                    'violin': -1,
                    'features': "oui"}
        self.xpl.init_app(settings)
        print(self.xpl.smartapp.settings)
        assert all(isinstance(attrib, int) for k, attrib in self.xpl.smartapp.settings.items())

    def test_settings_values(self):
        """
        Test settings values (must be >0)
        """
        settings = {'rows': 0,
                    'points': 5200.4,
                    'violin': -1,
                    'features': "oui"}
        self.xpl.init_app(settings)
        assert all(attrib > 0 for k, attrib in self.xpl.smartapp.settings.items())

    def test_settings_keys(self):
        """
        Test settings keys : the expected keys must be in the final settings dict, whatever the user input is
        """
        settings = {'oui': 1,
                    1: 2,
                    "a": []}
        self.xpl.init_app(settings)
        assert all(k in ['rows', 'points', 'violin', 'features'] for k in self.xpl.smartapp.settings)
Example #11
0
 def test_compile_1(self):
     """
     Unit test compile 1
     checking compile method without model
     """
     df = pd.DataFrame(range(0, 21), columns=['id'])
     df['y'] = df['id'].apply(lambda x: 1 if x < 10 else 0)
     df['x1'] = np.random.randint(1, 123, df.shape[0])
     df['x2'] = np.random.randint(1, 3, df.shape[0])
     df = df.set_index('id')
     clf = cb.CatBoostClassifier(n_estimators=1).fit(df[['x1', 'x2']], df['y'])
     xpl = SmartExplainer()
     xpl.compile(model=clf, x=df[['x1', 'x2']])
     assert xpl._case == "classification"
     self.assertListEqual(xpl._classes, [0, 1])
Example #12
0
    def test_run_app_1(self, mock_get_host_name, mock_custom_thread, mock_smartapp):
        """
        Test that when y_pred is not given, y_pred is automatically computed.
        """
        xpl = SmartExplainer()

        X = pd.DataFrame([[1, 1, 1], [2, 2, 2], [3, 3, 3]])
        contributions = pd.DataFrame([[0.1, -0.2, 0.3], [0.1, -0.2, 0.3], [0.1, -0.2, 0.3]])
        y_true = pd.DataFrame(data=np.array([1, 2, 3]), columns=['pred'])
        model = LinearRegression().fit(X, y_true)

        xpl.compile(contributions=contributions, x=X, model=model)
        xpl.run_app()

        assert xpl.y_pred is not None
Example #13
0
    def compute_contributions(self, x, model, methods, preprocessing):
        """
        Compute contributions based on specified methods

        Parameters
        ----------
        x : pandas.DataFrame
            Prediction set.
            IMPORTANT: this should be the raw prediction set, whose values are seen by the end user.
            x is a preprocessed dataset: Shapash can apply the model to it
        model : model object
            Model used to consistency check. model object can also be used by some method to compute
            predict and predict_proba values
        methods : list, optional
            When contributions is None, list of methods to use to calculate contributions, by default ["shap", "acv"]
        preprocessing : category_encoders, ColumnTransformer, list, dict
                --> Differents types of preprocessing are available:

                - A single category_encoders (OrdinalEncoder/OnehotEncoder/BaseNEncoder/BinaryEncoder/TargetEncoder)
                - A single ColumnTransformer with scikit-learn encoding or category_encoders transformers
                - A list with multiple category_encoders with optional (dict, list of dict)
                - A list with a single ColumnTransformer with optional (dict, list of dict)
                - A dict
                - A list of dict

        Returns
        -------
        contributions : dict
            Dict whose keys are method names and values are the corresponding contributions
        """
        contributions = {}
        xpl = SmartExplainer()

        for backend in methods:
            xpl.compile(x=x,
                        model=model,
                        preprocessing=preprocessing,
                        backend=backend)
            if xpl._case == "classification" and len(xpl._classes) == 2:
                contributions[backend] = xpl.contributions[1]
            elif xpl._case == "classification" and len(xpl._classes) > 2:
                raise AssertionError(
                    "Multi-class classification is not supported")
            else:
                contributions[backend] = xpl.contributions

        return contributions
Example #14
0
    def test_get_interaction_values_1(self):
        df = pd.DataFrame({
            "y": np.random.randint(2, size=50),
            "a": np.random.rand(50),
            "b": np.random.rand(50),
        })

        clf = cb.CatBoostClassifier(n_estimators=1).fit(df[['a', 'b']], df['y'])

        xpl = SmartExplainer()
        xpl.compile(x=df.drop('y', axis=1), model=clf)

        shap_interaction_values = xpl.get_interaction_values(n_samples_max=10)
        assert shap_interaction_values.shape[0] == 10

        shap_interaction_values = xpl.get_interaction_values()
        assert shap_interaction_values.shape[0] == df.shape[0]
Example #15
0
def init_sme_to_pickle_test():
    """
    Init sme to pickle test
    TODO: Docstring
    Returns
    -------
    [type]
        [description]
    """
    current = Path(path.abspath(__file__)).parent.parent.parent
    pkl_file = path.join(current, 'data/xpl.pkl')
    xpl = SmartExplainer()
    contributions = pd.DataFrame([[-0.1, 0.2, -0.3], [0.1, -0.2, 0.3]])
    y_pred = pd.DataFrame(data=np.array([1, 2]), columns=['pred'])
    dataframe_x = pd.DataFrame([[1, 2, 3], [1, 2, 3]])
    xpl.compile(contributions=contributions, x=dataframe_x, y_pred=y_pred, model=LinearRegression())
    xpl.filter(max_contrib=2)
    return pkl_file, xpl
Example #16
0
def init_sme_to_pickle_test():
    """
    Init sme to pickle test
    TODO: Docstring
    Returns
    -------
    [type]
        [description]
    """
    current = Path(path.abspath(__file__)).parent.parent.parent
    pkl_file = path.join(current, 'data/predictor.pkl')
    xpl = SmartExplainer(features_dict={})
    y_pred = pd.DataFrame(data=np.array([1, 2]), columns=['pred'])
    dataframe_x = pd.DataFrame([[1, 2, 4], [1, 2, 3]])
    clf = cb.CatBoostClassifier(n_estimators=1).fit(dataframe_x, y_pred)
    xpl.compile(x=dataframe_x, y_pred=y_pred, model=clf)
    predictor = xpl.to_smartpredictor()
    return pkl_file, predictor
 def test_display_model_explainability_2(self):
     """
     Tests multiclass case
     """
     df = pd.DataFrame(range(0, 21), columns=['id'])
     df['y'] = df['id'].apply(
         lambda x: 0 if x < 5 else 1 if (5 <= x < 10) else 2 if (10 <= x < 15) else 3)
     df['x1'] = np.random.randint(1, 123, df.shape[0])
     df['x2'] = np.random.randint(1, 3, df.shape[0])
     df = df.set_index('id')
     clf = cb.CatBoostClassifier(n_estimators=1).fit(df[['x1', 'x2']], df['y'])
     xpl = SmartExplainer()
     xpl.compile(model=clf, x=df[['x1', 'x2']])
     report = ProjectReport(
         explainer=xpl,
         project_info_file=os.path.join(current_path, '../../data/metadata.yaml')
     )
     report.display_model_explainability()
Example #18
0
    def test_compile_2(self):
        """
        Unit test compile 2
        checking new attributes added to the compile method
        """
        df = pd.DataFrame(range(0, 5), columns=['id'])
        df['y'] = df['id'].apply(lambda x: 1 if x < 2 else 0)
        df['x1'] = np.random.randint(1, 123, df.shape[0])
        df['x2'] = ["S", "M", "S", "D", "M"]
        df = df.set_index('id')
        encoder = ce.OrdinalEncoder(cols=["x2"], handle_unknown="None")
        encoder_fitted = encoder.fit(df)
        df_encoded = encoder_fitted.transform(df)
        output = df[["x1", "x2"]].copy()
        output["x2"] = ["single", "married", "single", "divorced", "married"]
        clf = cb.CatBoostClassifier(n_estimators=1).fit(df_encoded[['x1', 'x2']], df_encoded['y'])

        postprocessing_1 = {"x2": {
            "type": "transcoding",
            "rule": {"S": "single", "M": "married", "D": "divorced"}}}
        postprocessing_2 = {
            "family_situation": {
                "type": "transcoding",
                "rule": {"S": "single", "M": "married", "D": "divorced"}}}

        xpl_postprocessing1 = SmartExplainer()
        xpl_postprocessing2 = SmartExplainer(features_dict={"x1": "age",
                                                            "x2": "family_situation"}
                                             )
        xpl_postprocessing3 = SmartExplainer()

        xpl_postprocessing1.compile(model=clf,
                                    x=df_encoded[['x1', 'x2']],
                                    preprocessing=encoder_fitted,
                                    postprocessing=postprocessing_1)
        xpl_postprocessing2.compile(model=clf,
                                    x=df_encoded[['x1', 'x2']],
                                    preprocessing=encoder_fitted,
                                    postprocessing=postprocessing_2)
        xpl_postprocessing3.compile(model=clf,
                                    x=df_encoded[['x1', 'x2']],
                                    preprocessing=None,
                                    postprocessing=None)

        assert hasattr(xpl_postprocessing1, "preprocessing")
        assert hasattr(xpl_postprocessing1, "postprocessing")
        assert hasattr(xpl_postprocessing2, "preprocessing")
        assert hasattr(xpl_postprocessing2, "postprocessing")
        assert hasattr(xpl_postprocessing3, "preprocessing")
        assert hasattr(xpl_postprocessing3, "postprocessing")
        pd.testing.assert_frame_equal(xpl_postprocessing1.x_pred, output)
        pd.testing.assert_frame_equal(xpl_postprocessing2.x_pred, output)
        assert xpl_postprocessing1.preprocessing == encoder_fitted
        assert xpl_postprocessing2.preprocessing == encoder_fitted
        assert xpl_postprocessing1.postprocessing == postprocessing_1
        assert xpl_postprocessing2.postprocessing == postprocessing_1
Example #19
0
]
encoder = OrdinalEncoder(cols=categorical_features,
                         handle_unknown='ignore',
                         return_df=True).fit(X_df)
X_df = encoder.transform(X_df)

Xtrain, Xtest, ytrain, ytest = train_test_split(X_df,
                                                y_df,
                                                train_size=0.75,
                                                random_state=1)

regressor = LGBMRegressor(n_estimators=200).fit(Xtrain, ytrain)

y_pred = pd.DataFrame(regressor.predict(Xtest),
                      columns=['pred'],
                      index=Xtest.index)

xpl = SmartExplainer(features_dict=house_dict)

xpl.compile(x=Xtest,
            model=regressor,
            preprocessing=encoder,
            y_pred=y_pred,
            title_story='House Prices - Lightgbm Regressor')

xpl.init_app()
app = xpl.smartapp.app

if __name__ == "__main__":
    app.run_server(debug=False, host="0.0.0.0", port=8080)
Example #20
0
house_df.head()

categorical_features = [
    col for col in X_df.columns if X_df[col].dtype == 'object'
]
encoder = OrdinalEncoder(cols=categorical_features,
                         handle_unknown='ignore',
                         return_df=True).fit(X_df)
X_df = encoder.transform(X_df)

Xtrain, Xtest, ytrain, ytest = train_test_split(X_df,
                                                y_df,
                                                train_size=0.75,
                                                random_state=1)

regressor = LGBMRegressor(n_estimators=200).fit(Xtrain, ytrain)

y_pred = pd.DataFrame(regressor.predict(Xtest),
                      columns=['pred'],
                      index=Xtest.index)

xpl = SmartExplainer(features_dict=house_dict)

xpl.compile(x=Xtest, model=regressor, preprocessing=encoder, y_pred=y_pred)

xpl.init_app()
app = xpl.smartapp.app

if __name__ == "__main__":
    app.run_server(debug=False, host="0.0.0.0", port=8080)
Example #21
0
    titanic_enc,
    y,
    test_size=0.2,
)

X_test_ini = X.loc[X_test.index, :]

df = titanic[features + y.columns.to_list()]
df = df.loc[X_test.index, :]
df.reset_index(level=0, inplace=True)

model.fit(X_train, y_train)
y_pred = model.predict(X_test)

xpl = SmartExplainer()
y_pred = pd.DataFrame(data=y_pred,
                      columns=y.columns.to_list(),
                      index=X_test.index)

xpl.compile(X_test,
            model,
            y_pred=y_pred,
            preprocessing=encoder,
            title_story=cases[CASE])

xpl.init_app()
app = xpl.smartapp.app

if __name__ == "__main__":
    app.run_server(debug=False, host="0.0.0.0", port=8080)
Example #22
0
class TestGeneration(unittest.TestCase):
    def setUp(self):
        df = pd.DataFrame(range(0, 21), columns=['id'])
        df['y'] = df['id'].apply(lambda x: 1 if x < 10 else 0)
        df['x1'] = np.random.randint(1, 123, df.shape[0])
        df['x2'] = np.random.randint(1, 3, df.shape[0])
        df = df.set_index('id')
        clf = cb.CatBoostClassifier(n_estimators=1).fit(
            df[['x1', 'x2']], df['y'])
        self.xpl = SmartExplainer()
        self.xpl.compile(model=clf, x=df[['x1', 'x2']])
        self.df = df

    def test_exexcute_report_1(self):
        tmp_dir_path = tempfile.mkdtemp()

        execute_report(working_dir=tmp_dir_path,
                       explainer=self.xpl,
                       project_info_file=os.path.join(current_path,
                                                      '../data/metadata.yaml'),
                       config=None,
                       notebook_path=None)
        assert os.path.exists(
            os.path.join(tmp_dir_path, 'smart_explainer.pickle'))
        assert os.path.exists(os.path.join(tmp_dir_path, 'base_report.ipynb'))

        shutil.rmtree(tmp_dir_path)

    def test_exexcute_report_2(self):
        tmp_dir_path = tempfile.mkdtemp()

        execute_report(working_dir=tmp_dir_path,
                       explainer=self.xpl,
                       project_info_file=os.path.join(current_path,
                                                      '../data/metadata.yaml'),
                       x_train=self.df[['x1', 'x2']],
                       config=None,
                       notebook_path=None)
        assert os.path.exists(os.path.join(tmp_dir_path, 'x_train.csv'))
        assert os.path.exists(
            os.path.join(tmp_dir_path, 'smart_explainer.pickle'))
        assert os.path.exists(os.path.join(tmp_dir_path, 'base_report.ipynb'))

        shutil.rmtree(tmp_dir_path)

    def test_exexcute_report_3(self):
        tmp_dir_path = tempfile.mkdtemp()

        execute_report(working_dir=tmp_dir_path,
                       explainer=self.xpl,
                       project_info_file=os.path.join(current_path,
                                                      '../data/metadata.yaml'),
                       x_train=self.df[['x1', 'x2']],
                       y_test=self.df['y'],
                       config=None,
                       notebook_path=None)
        assert os.path.exists(os.path.join(tmp_dir_path, 'x_train.csv'))
        assert os.path.exists(os.path.join(tmp_dir_path, 'y_test.csv'))
        assert os.path.exists(
            os.path.join(tmp_dir_path, 'smart_explainer.pickle'))
        assert os.path.exists(os.path.join(tmp_dir_path, 'base_report.ipynb'))

        shutil.rmtree(tmp_dir_path)

    def test_exexcute_report_4(self):
        tmp_dir_path = tempfile.mkdtemp()

        execute_report(working_dir=tmp_dir_path,
                       explainer=self.xpl,
                       project_info_file=os.path.join(current_path,
                                                      '../data/metadata.yaml'),
                       x_train=self.df[['x1', 'x2']],
                       y_train=self.df['y'],
                       y_test=self.df['y'],
                       config=None,
                       notebook_path=None)
        assert os.path.exists(os.path.join(tmp_dir_path, 'x_train.csv'))
        assert os.path.exists(os.path.join(tmp_dir_path, 'y_test.csv'))
        assert os.path.exists(os.path.join(tmp_dir_path, 'y_train.csv'))
        assert os.path.exists(
            os.path.join(tmp_dir_path, 'smart_explainer.pickle'))
        assert os.path.exists(os.path.join(tmp_dir_path, 'base_report.ipynb'))

        shutil.rmtree(tmp_dir_path)

    def test_export_and_save_report_1(self):
        tmp_dir_path = tempfile.mkdtemp()

        execute_report(
            working_dir=tmp_dir_path,
            explainer=self.xpl,
            project_info_file=os.path.join(current_path,
                                           '../data/metadata.yaml'),
        )

        outfile = os.path.join(tmp_dir_path, 'report.html')
        export_and_save_report(working_dir=tmp_dir_path, output_file=outfile)
        assert os.path.exists(outfile)
        shutil.rmtree(tmp_dir_path)
shap_interaction_values = shap.TreeExplainer(XGBoostModel).shap_interaction_values(X)
shap.summary_plot(shap_interaction_values, X)
# To choose 2nd feature by your choice and no automatic
shap.dependence_plot(("Var1", "Var1"),shap_interaction_values, X) #Var1 only
shap.dependence_plot(("Var1", "Var2"),shap_interaction_values, X) #choose Var2 instead of automatic choosing


### SHAPASH ###
# https://www.analyticsvidhya.com/blog/2021/04/shapash-python-library-to-make-machine-learning-interpretable/
# it allows you to quickly understand the machine learning model by using a simple webapp
pip install shapash
from shapash.explainer.smart_explainer import SmartExplainer   # Import shapash module
# Initialize class. Here we initialize the class of shapash and then inside this class having inbuild function compile where we set a pair of parameters
SE = SmartExplainer()
SE.compile(
x=xtest,  # test set
model=regressor,  # black-box model
)
# to run an app 
app = SE.run_app(title_story='Concrete_Data')  
# to kill the app
app_name.kill()
# prediction
prediction = SE.to_smartpredictor()
prediction.save('./predictor.pkl')
from shapash.utils.load_smartpredictor import load_smartpredictor
predictor_load = load_smartpredictor('./predictor.pkl')
load.add_input(x=x, ypred=y)
detailed = load.detail_contributions()
detailed_contributions.head()

class TestProjectReport(unittest.TestCase):

    def setUp(self):
        self.df = pd.DataFrame(range(0, 21), columns=['id'])
        self.df['y'] = self.df['id'].apply(lambda x: 1 if x < 10 else 0)
        self.df['x1'] = np.random.randint(1, 123, self.df.shape[0])
        self.df['x2'] = np.random.randint(1, 3, self.df.shape[0])
        self.df = self.df.set_index('id')
        self.clf = cb.CatBoostClassifier(n_estimators=1).fit(self.df[['x1', 'x2']], self.df['y'])
        self.xpl = SmartExplainer()
        self.xpl.compile(model=self.clf, x=self.df[['x1', 'x2']])
        self.report1 = ProjectReport(
            explainer=self.xpl,
            project_info_file=os.path.join(current_path, '../../data/metadata.yaml'),
        )
        self.report2 = ProjectReport(
            explainer=self.xpl,
            project_info_file=os.path.join(current_path, '../../data/metadata.yaml'),
            x_train=self.df[['x1', 'x2']],
        )

    def test_init_1(self):
        report = ProjectReport(
            explainer=self.xpl,
            project_info_file=os.path.join(current_path, '../../data/metadata.yaml'),
        )
        for attr in expected_attrs:
            assert hasattr(report, attr)

    def test_init_2(self):
        report = ProjectReport(
            explainer=self.xpl,
            project_info_file=os.path.join(current_path, '../../data/metadata.yaml'),
            x_train=self.df[['x1', 'x2']],
        )
        for attr in expected_attrs:
            assert hasattr(report, attr)

    def test_init_3(self):
        report = ProjectReport(
            explainer=self.xpl,
            project_info_file=os.path.join(current_path, '../../data/metadata.yaml'),
            x_train=self.df[['x1', 'x2']],
            y_test=self.df['y']
        )
        for attr in expected_attrs:
            assert hasattr(report, attr)

    def test_init_4(self):
        report = ProjectReport(
            explainer=self.xpl,
            project_info_file=os.path.join(current_path, '../../data/metadata.yaml'),
            x_train=self.df[['x1', 'x2']],
            y_test=self.df['y'],
            config={}
        )
        for attr in expected_attrs:
            assert hasattr(report, attr)

    def test_init_5(self):
        ProjectReport(
            explainer=self.xpl,
            project_info_file=os.path.join(current_path, '../../data/metadata.yaml'),
            x_train=self.df[['x1', 'x2']],
            y_test=self.df['y'],
            config={'metrics': [{'path': 'sklearn.metrics.mean_squared_error'}]}
        )

    def test_init_6(self):
        self.assertRaises(ValueError, ProjectReport,
            self.xpl,
            os.path.join(current_path, '../../data/metadata.yaml'),
            self.df[['x1', 'x2']],
            self.df['y'],
            {'metrics': ['sklearn.metrics.mean_squared_error']}
        )

    @patch('shapash.report.project_report.print_html')
    def test_display_title_description_1(self, mock_print_html):
        self.report1.display_title_description()
        mock_print_html.assert_called_once()

    @patch('shapash.report.project_report.print_html')
    def test_display_title_description_2(self, mock_print_html):
        report = ProjectReport(
            explainer=self.xpl,
            project_info_file=os.path.join(current_path, '../../data/metadata.yaml'),
            x_train=self.df[['x1', 'x2']],
            y_test=self.df['y'],
            config={'title_story': "My project report",
                    'title_description': """This document is a data science project report."""}
        )
        report.display_title_description()
        self.assertEqual(mock_print_html.call_count, 2)

    @patch('shapash.report.project_report.print_md')
    def test_display_general_information_1(self, mock_print_html):
        report = ProjectReport(
            explainer=self.xpl,
            project_info_file=os.path.join(current_path, '../../data/metadata.yaml')
        )
        report.display_project_information()
        self.assertTrue(mock_print_html.called)

    @patch('shapash.report.project_report.print_md')
    def test_display_model_information_1(self, mock_print_md):
        report = ProjectReport(
            explainer=self.xpl,
            project_info_file=os.path.join(current_path, '../../data/metadata.yaml')
        )
        report.display_model_analysis()
        self.assertTrue(mock_print_md.called)

    def test_display_dataset_analysis_1(self):
        report = ProjectReport(
            explainer=self.xpl,
            project_info_file=os.path.join(current_path, '../../data/metadata.yaml'),
            x_train=self.df[['x1', 'x2']],
        )
        report.display_dataset_analysis()

    def test_display_dataset_analysis_2(self):
        report = ProjectReport(
            explainer=self.xpl,
            project_info_file=os.path.join(current_path, '../../data/metadata.yaml'),
        )
        report.display_dataset_analysis()

    @patch('shapash.report.project_report.generate_correlation_matrix_fig')
    def test_display_dataset_analysis_3(self, mock_correlation_matrix):
        """
        Test we don't have a problem when only categorical features
        """
        df = self.df.copy()
        df['x1'] = 'a'
        df['x2'] = df['x2'].astype(str)
        encoder = OrdinalEncoder(
            cols=['x1', 'x2'],
            handle_unknown='ignore',
            return_df=True).fit(df)

        df = encoder.transform(df)

        clf = cb.CatBoostClassifier(n_estimators=1).fit(df[['x1', 'x2']], df['y'])
        xpl = SmartExplainer()
        xpl.compile(model=clf, x=df[['x1', 'x2']])
        report = ProjectReport(
            explainer=xpl,
            project_info_file=os.path.join(current_path, '../../data/metadata.yaml'),
            x_train=df[['x1', 'x2']],
        )

        report.display_dataset_analysis()

        self.assertEqual(mock_correlation_matrix.call_count, 0)

    def test_display_model_explainability_1(self):
        report = ProjectReport(
            explainer=self.xpl,
            project_info_file=os.path.join(current_path, '../../data/metadata.yaml'),
        )
        report.display_model_explainability()

    def test_display_model_explainability_2(self):
        """
        Tests multiclass case
        """
        df = pd.DataFrame(range(0, 21), columns=['id'])
        df['y'] = df['id'].apply(
            lambda x: 0 if x < 5 else 1 if (5 <= x < 10) else 2 if (10 <= x < 15) else 3)
        df['x1'] = np.random.randint(1, 123, df.shape[0])
        df['x2'] = np.random.randint(1, 3, df.shape[0])
        df = df.set_index('id')
        clf = cb.CatBoostClassifier(n_estimators=1).fit(df[['x1', 'x2']], df['y'])
        xpl = SmartExplainer()
        xpl.compile(model=clf, x=df[['x1', 'x2']])
        report = ProjectReport(
            explainer=xpl,
            project_info_file=os.path.join(current_path, '../../data/metadata.yaml')
        )
        report.display_model_explainability()

    @patch('shapash.report.project_report.logging')
    def test_display_model_performance_1(self, mock_logging):
        """
        No y_test given
        """
        report = ProjectReport(
            explainer=self.xpl,
            project_info_file=os.path.join(current_path, '../../data/metadata.yaml'),
        )
        report.display_model_performance()
        mock_logging.info.assert_called_once()

    @patch('shapash.report.project_report.logging')
    def test_display_model_performance_2(self, mock_logging):
        report = ProjectReport(
            explainer=self.xpl,
            project_info_file=os.path.join(current_path, '../../data/metadata.yaml'),
            y_test=self.df['y'],
            config=dict(metrics=[{'path': 'sklearn.metrics.mean_squared_error'}])
        )
        report.display_model_performance()
        self.assertEqual(mock_logging.call_count, 0)

    @patch('shapash.report.project_report.logging')
    def test_display_model_performance_3(self, mock_logging):
        """
        No metrics given in ProjectReport
        """
        report = ProjectReport(
            explainer=self.xpl,
            project_info_file=os.path.join(current_path, '../../data/metadata.yaml'),
            y_test=self.df['y'],
        )
        report.display_model_performance()
        mock_logging.info.assert_called_once()

    @patch('shapash.report.project_report.logging')
    def test_display_model_performance_4(self, mock_logging):
        """
        Test use of proba values.
        """
        report = ProjectReport(
            explainer=self.xpl,
            project_info_file=os.path.join(current_path, '../../data/metadata.yaml'),
            y_test=self.df['y'],
            config=dict(metrics=[{'path': 'sklearn.metrics.log_loss', 'use_proba_values': True}])
        )
        report.display_model_performance()
        self.assertEqual(mock_logging.call_count, 0)