def setUp(self):
        data = dx.datasets.load_titanic()
        data.loc[:, 'survived'] = LabelEncoder().fit_transform(data.survived)

        self.X = data.drop(columns='survived')
        self.y = data.survived

        numeric_features = ['age', 'fare', 'sibsp', 'parch']
        numeric_transformer = Pipeline(steps=[
            ('imputer', SimpleImputer(strategy='median')),
            ('scaler', StandardScaler())])

        categorical_features = ['gender', 'class', 'embarked']
        categorical_transformer = Pipeline(steps=[
            ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
            ('onehot', OneHotEncoder(handle_unknown='ignore'))])

        preprocessor = ColumnTransformer(
            transformers=[
                ('num', numeric_transformer, numeric_features),
                ('cat', categorical_transformer, categorical_features)])

        clf = Pipeline(steps=[('preprocessor', preprocessor),
                              ('classifier', MLPClassifier(hidden_layer_sizes=(20, 20),
                                                           max_iter=400, random_state=0))])
        clf2 = Pipeline(steps=[('preprocessor', preprocessor),
                               ('classifier', MLPClassifier(hidden_layer_sizes=(50, 100, 50),
                                                            max_iter=400, random_state=0))])

        clf.fit(self.X, self.y)
        clf2.fit(self.X, self.y)

        self.exp = dx.Explainer(clf, self.X, self.y, label="model1", verbose=False)
        self.exp2 = dx.Explainer(clf2, self.X, self.y, verbose=False)
        self.exp3 = dx.Explainer(clf, self.X, self.y, label="model3", verbose=False)
Example #2
0
    def test(self):
        case1 = dx.Explainer(self.model, self.X, self.y, verbose=False)
        case2 = dx.Explainer(self.model, self.X, None, verbose=False)
        case3 = dx.Explainer(self.model, None, self.y, verbose=False)
        case4 = dx.Explainer(self.model, None, None, verbose=False)

        self.assertIsInstance(case1, dx.Explainer)
        self.assertIsInstance(case2, dx.Explainer)
        self.assertIsInstance(case3, dx.Explainer)
        self.assertIsInstance(case4, dx.Explainer)

        with self.assertRaises(ValueError):
            case2.model_performance()
        with self.assertRaises(ValueError):
            case3.model_parts()
        with self.assertRaises(ValueError):
            case4.model_profile()

        case5 = case2.predict_parts(self.X.iloc[[0]])
        case6 = case2.predict_profile(self.X.iloc[[0]])

        self.assertIsInstance(case5, dx.instance_level.BreakDown)
        self.assertIsInstance(case6, dx.instance_level.CeterisParibus)

        with warnings.catch_warnings(record=True) as w:
            # Cause all warnings to always be triggered.
            warnings.simplefilter("always")
            # Trigger a warning.
            case5 = dx.Explainer(self.model, self.X, self.y, predict_function=1, verbose=False)
            assert issubclass(w[-1].category, UserWarning)

        self.assertIsInstance(case5, dx.Explainer)
Example #3
0
    def test(self):
        case1 = dx.Explainer(self.model, self.X, self.y, verbose=False)
        case2 = dx.Explainer(self.model, self.X, None, verbose=False)
        case3 = dx.Explainer(self.model, None, self.y, verbose=False)
        case4 = dx.Explainer(self.model, None, None, verbose=False)

        self.assertIsInstance(case1, dx.Explainer)
        self.assertIsInstance(case2, dx.Explainer)
        self.assertIsInstance(case3, dx.Explainer)
        self.assertIsInstance(case4, dx.Explainer)

        with self.assertRaises(ValueError):
            case2.model_performance()
        with self.assertRaises(ValueError):
            case3.model_parts()
        with self.assertRaises(ValueError):
            case4.model_profile()

        case5 = case2.predict_parts(self.X.iloc[[0]])
        case6 = case2.predict_profile(self.X.iloc[[0]])

        self.assertIsInstance(case5, dx.instance_level.BreakDown)
        self.assertIsInstance(case6, dx.instance_level.CeterisParibus)

        case5 = dx.Explainer(self.model,
                             self.X,
                             self.y,
                             predict_function=1,
                             verbose=False)
        self.assertIsInstance(case5, dx.Explainer)
Example #4
0
    def pdp_profile(self,
                    X,
                    y,
                    model_names=None,
                    features=None,
                    figsize=(8, 8)):
        """
        Function plots pdp profile.
        """

        if model_names is None:
            model_names = self.pdp_measures.columns.tolist()[1:]

        if features is None:
            features = self.pdp_measures.colname.tolist()

        for fe in features:

            profile_base = dx.Explainer(self.base_model[1],
                                        X,
                                        y,
                                        label=self.base_model[0],
                                        verbose=False)
            profile_base = profile_base.model_profile(verbose=False,
                                                      variables=[fe])

            df = pd.DataFrame({
                'x': profile_base.result._x_,
                self.base_model[0]: profile_base.result._yhat_
            })

            del profile_base
            plt.subplots(figsize=figsize)

            for model in self.models:
                profile = dx.Explainer(model[1],
                                       X,
                                       y,
                                       label=model[0],
                                       verbose=False)
                profile = profile.model_profile(verbose=False, variables=[fe])

                y_result = profile.result._yhat_

                df[model[0]] = y_result

                del profile

            for col in df.columns[1:]:
                plt.plot(df['x'], df[col])

            plt.title("PDP curves for feature: " + fe)
            plt.legend([col for col in df.columns[1:]])
            plt.xlabel(fe)

            plt.show()
Example #5
0
    def test_errors(self):

        from sklearn.ensemble import RandomForestRegressor

        data = dx.datasets.load_fifa()
        X = data.drop(columns=['nationality', 'value_eur']).iloc[1:100, :]
        y = data['value_eur'][1:100]

        model = RandomForestRegressor()
        model.fit(X, y)

        def predict_function_return_2d(m, d):
            n_rows = d.shape[0]
            prediction = m.predict(d)
            return prediction.reshape((n_rows, 1))

        def predict_function_return_3d(m, d):
            n_rows = d.shape[0]
            prediction = m.predict(d)
            return prediction.reshape((n_rows, 1, 1))

        def predict_function_return_one_element_array(m, d):
            return np.array(0.2)

        warnings.simplefilter("always")
        with warnings.catch_warnings(record=True) as w:
            # Trigger a warning.
            dx.Explainer(model,
                         X,
                         y,
                         verbose=False,
                         model_type='regression',
                         predict_function=predict_function_return_2d)
            assert issubclass(w[-1].category, UserWarning)

        with warnings.catch_warnings(record=True) as w:
            # Trigger a warning.
            dx.Explainer(model,
                         X,
                         y,
                         verbose=False,
                         model_type='regression',
                         predict_function=predict_function_return_3d)
            assert issubclass(w[-1].category, UserWarning)

        with warnings.catch_warnings(record=True) as w:
            # Trigger a warning.
            dx.Explainer(
                model,
                X,
                y,
                verbose=False,
                model_type='regression',
                predict_function=predict_function_return_one_element_array)
            assert issubclass(w[-1].category, UserWarning)
    def setUp(self):
        data = dx.datasets.load_titanic()
        data.loc[:, 'survived'] = LabelEncoder().fit_transform(data.survived)

        self.X = data.drop(columns='survived')
        self.y = data.survived

        numeric_features = ['age', 'fare', 'sibsp', 'parch']
        numeric_transformer = Pipeline(
            steps=[('imputer', SimpleImputer(
                strategy='median')), ('scaler', StandardScaler())])

        categorical_features = ['gender', 'class', 'embarked']
        categorical_transformer = Pipeline(
            steps=[('imputer',
                    SimpleImputer(strategy='constant', fill_value='missing')
                    ), ('onehot', OneHotEncoder(handle_unknown='ignore'))])

        preprocessor = ColumnTransformer(
            transformers=[('num', numeric_transformer, numeric_features),
                          ('cat', categorical_transformer,
                           categorical_features)])

        clf = Pipeline(steps=[('preprocessor', preprocessor),
                              ('classifier',
                               MLPClassifier(hidden_layer_sizes=(20, 20),
                                             max_iter=400,
                                             random_state=0))])
        clf2 = Pipeline(steps=[('preprocessor', preprocessor),
                               ('classifier',
                                MLPClassifier(hidden_layer_sizes=(50, 100, 50),
                                              max_iter=400,
                                              random_state=0))])

        clf.fit(self.X, self.y)
        clf2.fit(self.X, self.y)

        self.exp = dx.Explainer(clf,
                                self.X,
                                self.y,
                                label="model1",
                                verbose=False)
        self.exp2 = dx.Explainer(clf2,
                                 self.X,
                                 self.y,
                                 label="model2",
                                 verbose=False)

        # This plots should be supported
        self.reference_plots = [
            ROCContainer, ShapleyValuesContainer, BreakDownContainer,
            CeterisParibusContainer, FeatureImportanceContainer,
            PartialDependenceContainer, AccumulatedDependenceContainer,
            MetricsContainer
        ]
Example #7
0
    def generate_breakdown_explainer(self, i):
        model_name = type(self.model).__name__
        model = self.model
        if model_name == 'Sequential':
            X_test = pd.DataFrame.from_records(self.X_test)

            model_name = self.model.name
            if self.model.name == 'DNN':
                instc = self.X_test[i].reshape((1, -1))
                dnn_clf = tf.keras.wrappers.scikit_learn.KerasClassifier(
                    dnn_model_create,
                    input=self.X_test.shape[1],
                    epochs=10,
                    verbose=False)
                dnn_clf._estimator_type = "classifier"
                dnn_clf.fit(self.X_train, self.y_train)
                model = dnn_clf
            elif self.model.name == 'RNN':
                print("")
        else:
            instc = self.X_test.iloc[i]
            X_test = self.X_test

        smart_grid_exp = dx.Explainer(
            model,
            X_test,
            self.y_test,
            label=("Smart Grid New England " + model_name +
                   " Pipeline on instance: " + str(i)))

        X_test.columns = self.feature_names
        instance = smart_grid_exp.predict_parts(instc, type='break_down')
        fig = instance.plot(max_vars=30, show=False)
        fig.write_image("explainer_outputs/Break_Down/" + model_name + "_" +
                        str(i) + "_" + self.grid + ".svg")
Example #8
0
    def setUp(self):
        data = pd.read_csv("titanic.csv", index_col=0).dropna()
        data.loc[:, 'survived'] = LabelEncoder().fit_transform(data.survived)

        self.X = data.drop(columns='survived')
        self.y = data.survived

        numeric_features = ['age', 'fare', 'sibsp', 'parch']
        numeric_transformer = Pipeline(steps=[
            ('imputer', SimpleImputer(strategy='median')),
            ('scaler', StandardScaler())])

        categorical_features = ['gender', 'class', 'embarked', 'country']
        categorical_transformer = Pipeline(steps=[
            ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
            ('onehot', OneHotEncoder(handle_unknown='ignore'))])

        preprocessor = ColumnTransformer(
            transformers=[
                ('num', numeric_transformer, numeric_features),
                ('cat', categorical_transformer, categorical_features)])

        clf = Pipeline(steps=[('preprocessor', preprocessor),
                              ('classifier', MLPRegressor(hidden_layer_sizes=(150, 100, 50),
                                                          max_iter=500, random_state=0))])

        clf.fit(self.X, self.y)

        self.exp = dx.Explainer(clf, self.X, self.y)
Example #9
0
    def setUp(self):
        self.protected = np.array(['a', 'a', 'a', 'a', 'a', 'a', 'b', 'b', 'b', 'c', 'c', 'c', 'c'])
        self.y_true = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0])
        self.y_pred = np.array([0.32, 0.43, 0.56, 0.67, 0.9, 0.67, 0.98, 0.1, 0.44, 1, 0.65, 0.55, 1])
        self.cutoff = {'a': 0.5, 'b': 0.4, 'c': 0.6}

        # classifier
        data = dx.datasets.load_german()

        X = data.drop(columns='risk')
        y = data.risk

        categorical_features = ['sex', 'job', 'housing', 'saving_accounts', "checking_account", 'purpose']
        categorical_transformer = Pipeline(steps=[
            ('onehot', OneHotEncoder(handle_unknown='ignore'))])

        preprocessor = ColumnTransformer(
            transformers=[
                ('cat', categorical_transformer, categorical_features)])

        clf = Pipeline(steps=[('preprocessor', preprocessor),
                              ('classifier', DecisionTreeClassifier(max_depth=7, random_state=123))])
        clf.fit(X, y)

        self.exp = dx.Explainer(clf, X, y, verbose=False)
        self.german_protected = data.sex + '_' + np.where(data.age < 25, 'young', 'old')

        self.mgf = self.exp.model_fairness(protected=self.german_protected,
                                           privileged='male_old',
                                           verbose=False)
        self.mgf2 = deepcopy(self.mgf)
        self.mgf.label = 'first'
        self.mgf2.label = 'second'

        # regressor
        self.protected_reg = np.array([np.tile('A', 1000), np.tile('B', 1000)]).flatten()
        first = np.array([np.random.normal(100, 20, 1000), np.random.normal(50, 10, 1000)]).flatten()
        second = np.array([np.random.normal(60, 20, 1000), np.random.normal(60, 10, 1000)]).flatten()
        target = np.array([np.random.normal(10000, 2000, 1000), np.random.normal(8000, 1000, 1000)]).flatten()
        data2 = pd.DataFrame({'first': first, 'second': second})

        reg = DecisionTreeRegressor()
        reg.fit(data2, target)

        self.exp_reg = dx.Explainer(reg, data2, target)
        self.mgf_reg = self.exp_reg.model_fairness(self.protected_reg, 'A')
Example #10
0
def ale_plot_uplift(model, X_train, Y_train, treatment_col):
    exp = dalex.Explainer(
        model,
        X_train,
        Y_train,
        predict_function=lambda model, x: calc_uplift(model, x, treatment_col))
    ale = exp.model_profile(type='accumulated')
    ale.plot()
Example #11
0
def pdp_plot_uplift(model, X_train, Y_train, treatment_col):
    exp = dalex.Explainer(
        model,
        X_train,
        Y_train,
        predict_function=lambda model, x: calc_uplift(model, x, treatment_col))
    partial = exp.model_profile(type='partial')
    partial.plot()
    def setUp(self):
        data = dx.datasets.load_titanic()
        data.loc[:, 'survived'] = LabelEncoder().fit_transform(data.survived)

        self.X = data.loc[:, ["age", "fare", "sibsp", "parch"]]
        self.y = data.survived

        clf = RandomForestClassifier(n_estimators=100)
        clf.fit(self.X, self.y)

        self.exp = dx.Explainer(clf, self.X, self.y, verbose=False)
Example #13
0
    def setUp(self):
        data = dx.datasets.load_titanic()
        self.X = data.drop(columns=['survived', 'class', 'embarked'])
        self.y = data.survived
        self.X.gender = LabelEncoder().fit_transform(self.X.gender)

        # this checks for no feature_importances_ attribute
        model = MLPClassifier(hidden_layer_sizes=(50, 50), max_iter=400, random_state=0)
        model.fit(self.X, self.y)
        self.exp = dx.Explainer(model, self.X, self.y, verbose=False)

        data2 = dx.datasets.load_fifa()
        self.X2 = data2.drop(["nationality", "overall", "potential",
                              "value_eur", "wage_eur"], axis=1).iloc[0:2000, 0:10]
        self.y2 = data2['value_eur'].iloc[0:2000]

        # this checks for feature_importances_ attribute
        model2 = RandomForestRegressor(random_state=0)
        model2.fit(self.X2, self.y2)
        self.exp2 = dx.Explainer(model2, self.X2, self.y2, verbose=False)
def init_explainer(model,
                   data_x: pd.DataFrame,
                   exp_type: str = 'shap',
                   log_loss: bool = False,
                   verbose: bool = False):
    """ Construye los objetos de explainer para las librerias de SHAP o DALEX, 
        estos se ocupan más adelante tanto para realizar calculos como crear graficos
    Arg: 
        model: Modelo del tipo Tree
        data_x: Datos que se ajusten a la entrada del modelo
        exp_type: Tipo de explainer a contruir, {'shap', 'dalex'}
        log_loss: Para decidir si calcular los SHAP values de la contribucion a la salida del modelo (False) o al error de clasificacion (True)
    Returns:
        Objeto explainer del tipo especificado
    """

    if exp_type == 'shap':
        if log_loss:
            try:
                return shap.TreeExplainer(model,
                                          data_x,
                                          model_output='log_loss')
            except:
                return shap.TreeExplainer(model, model_output='log_loss')
        else:
            try:
                return shap.TreeExplainer(model, data_x)
            except:
                return shap.TreeExplainer(model)
    elif exp_type == 'dalex':
        return dx.Explainer(model,
                            data_x,
                            model.predict(data_x),
                            verbose=verbose)
    else:
        raise ValueError('exp_type="shap" or exp_type="dalex"')
Example #15
0
# 전이학습을 이용한 사용자 분류
# ResNet-100

from keras.datasets import cifar100, cifar10

from keras.applications import VGG16, VGG19, Xception, ResNet101
from keras.models import Sequential, Model, Input
from keras.layers import Dense, Conv2D, Flatten, BatchNormalization, Activation, MaxPooling2D, Dropout
from keras.optimizers import Adam

import dalex as dx
expl = dx.Explainer(clf, X, y, label="Titanic MLP Pipeline")

(x_train, y_train), (x_test, y_test) = cifar10.load_data()

x_train = x_train.reshape(50000, 32, 32, 3).astype('float32') / 255.0
x_test = x_test.reshape(10000, 32, 32, 3).astype('float32') / 255.0

resnet101 = ResNet101(input_shape=(32, 32, 3), include_top=False)

model = Sequential()
model.add(resnet101)
model.add(Flatten())
model.add(Dense(512))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dense(10, activation='softmax'))

model.summary()

model.compile(optimizer=Adam(2e-4),
Example #16
0
class FairnessTest(unittest.TestCase):

    def test_ConfusionMatrix(self):
        from dalex.fairness.group_fairness.utils import ConfusionMatrix
        y_true = np.array([0, 0, 0, 0, 1, 1, 1, 1])
        y_pred = np.array([0.32, 0.54, 0.56, 0.67, 0.34, 0.67, 0.98, 1])
        cutoff = 0.55
        cm = ConfusionMatrix(y_true, y_pred, cutoff)

        #  proper calculations
        self.assertEqual(cm.cutoff, 0.55)
        self.assertEqual(cm.tp, 3)
        self.assertEqual(cm.tn, 2)
        self.assertEqual(cm.fp, 2)
        self.assertEqual(cm.fn, 1)

        #  error assertions
        y_true = y_true[:-1]
        with self.assertRaises(AssertionError):
            cm_ = ConfusionMatrix(y_true, y_pred, cutoff)
        y_true = np.append(y_true, 1)
        cutoff = 1.5
        with self.assertRaises(AssertionError):
            cm_ = ConfusionMatrix(y_true, y_pred, cutoff)

    protected = np.array(['a', 'a', 'a', 'a', 'a', 'a', 'b', 'b', 'b', 'c', 'c', 'c', 'c'])
    y_true = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0])
    y_pred = np.array([0.32, 0.43, 0.56, 0.67, 0.9, 0.67, 0.98, 0.1, 0.44, 1, 0.65, 0.55, 1])
    cutoff = {'a': 0.5, 'b': 0.4, 'c': 0.6}

    def test_SubConfusionMatrix(self):
        from dalex.fairness.group_fairness.utils import SubgroupConfusionMatrix

        y_true = FairnessTest.y_true
        y_pred = FairnessTest.y_pred
        protected = FairnessTest.protected
        cutoff = FairnessTest.cutoff

        scf = SubgroupConfusionMatrix(y_true, y_pred, protected, cutoff)

        #  proper calculations
        self.assertEqual(scf.sub_dict.get('a').tp, 2)
        self.assertEqual(scf.sub_dict.get('a').fp, 2)
        self.assertEqual(scf.sub_dict.get('a').fn, 0)
        self.assertEqual(scf.sub_dict.get('a').tn, 2)

        self.assertEqual(scf.sub_dict.get('b').tp, 2)
        self.assertEqual(scf.sub_dict.get('b').fp, 0)
        self.assertEqual(scf.sub_dict.get('b').fn, 1)
        self.assertEqual(scf.sub_dict.get('b').tn, 0)

        self.assertEqual(scf.sub_dict.get('c').tp, 2)
        self.assertEqual(scf.sub_dict.get('c').fp, 1)
        self.assertEqual(scf.sub_dict.get('c').fn, 0)
        self.assertEqual(scf.sub_dict.get('c').tn, 1)

        #  error assertions
        y_true = y_true[:-1]
        with self.assertRaises(AssertionError):
            cm_ = SubgroupConfusionMatrix(y_true, y_pred, protected, cutoff)
        y_true = np.append(y_true, 1)
        cutoff = [0.1, 0.2, 0.4]  # list instead of dict
        with self.assertRaises(AssertionError):
            cm_ = SubgroupConfusionMatrix(y_true, y_pred, protected, cutoff)

    def test_SubgroupConfusionMatrixMetrics(self):
        y_true = FairnessTest.y_true
        y_pred = FairnessTest.y_pred
        protected = FairnessTest.protected
        cutoff = FairnessTest.cutoff

        scf = SubgroupConfusionMatrix(y_true, y_pred, protected, cutoff)
        scf_metrics = SubgroupConfusionMatrixMetrics(scf)
        scmm = scf_metrics.subgroup_confusion_matrix_metrics
        self.assertEqual(scmm.get('a').get('TPR'), 1)
        self.assertEqual(scmm.get('b').get('TPR'), 0.667)
        self.assertTrue(np.isnan(scmm.get('b').get('TNR')))

    def test_calculate_ratio(self):
        y_true = FairnessTest.y_true
        y_pred = FairnessTest.y_pred
        protected = FairnessTest.protected
        cutoff = FairnessTest.cutoff

        scf = SubgroupConfusionMatrix(y_true, y_pred, protected, cutoff)
        scf_metrics = SubgroupConfusionMatrixMetrics(scf)

        df_ratio = calculate_ratio(scf_metrics, 'a')

        b = list(scf_metrics.subgroup_confusion_matrix_metrics.get('b').values())
        a = list(scf_metrics.subgroup_confusion_matrix_metrics.get('a').values())

        ratio = np.array(b) / np.array(a)
        ratio[np.isinf(ratio)] = np.nan
        ratio[ratio == 0] = np.nan

        ratio_nonnan = ratio[np.isfinite(ratio)]
        df_ratio_nonnan = np.array(df_ratio.iloc[1, :][np.isfinite(df_ratio.iloc[1, :])])

        self.assertTrue(np.equal(ratio_nonnan, df_ratio_nonnan).all())

    def test_calculate_parity_loss(self):
        y_true = FairnessTest.y_true
        y_pred = FairnessTest.y_pred
        protected = FairnessTest.protected
        cutoff = FairnessTest.cutoff

        scf = SubgroupConfusionMatrix(y_true, y_pred, protected, cutoff)
        scf_metrics = SubgroupConfusionMatrixMetrics(scf)

        parity_loss = calculate_parity_loss(scf_metrics, "a")
        ratios = calculate_ratio(scf_metrics, "a")
        TPR_parity_loss = parity_loss.iloc[0]

        TPR_ratios = ratios.TPR / ratios.TPR[0]
        TPR_log =abs(np.log(TPR_ratios))

        self.assertEqual(TPR_log.sum(), TPR_parity_loss)

    data = dx.datasets.load_german()

    X = data.drop(columns='Risk')
    y = data.Risk

    categorical_features = ['Sex', 'Job', 'Housing', 'Saving_accounts', "Checking_account", 'Purpose']
    categorical_transformer = Pipeline(steps=[
        ('onehot', OneHotEncoder(handle_unknown='ignore'))])

    preprocessor = ColumnTransformer(
        transformers=[
            ('cat', categorical_transformer, categorical_features)])

    clf = Pipeline(steps=[('preprocessor', preprocessor),
                          ('classifier', DecisionTreeClassifier(max_depth=7, random_state=123))])
    clf.fit(X, y)

    exp = dx.Explainer(clf, X, y)
    german_protected = data.Sex + '_' + np.where(data.Age < 25, 'young', 'old')

    def test_GroupFairnessClassificationObject(self):
        exp = FairnessTest.exp
        protected = FairnessTest.german_protected

        gfco = GroupFairnessClassification(y=exp.y,
                                           y_hat=exp.y_hat,
                                           protected=protected,
                                           privileged='male_old',
                                           verbose=False,
                                           label=exp.label)
        self.assertEqual(gfco.__class__.__name__, 'GroupFairnessClassification')

    def test_parameter_checks(self):
        exp = FairnessTest.exp
        protected = FairnessTest.german_protected

        #  error handling
        wrong_protected = np.array([protected, protected])
        with self.assertRaises(ParameterCheckError):
            gfco = exp.model_fairness(protected=wrong_protected,
                                      privileged='male_old',
                                      verbose=False)

        with self.assertRaises(ParameterCheckError):
            gfco = exp.model_fairness(protected=protected,
                                      privileged='not_existing',
                                      verbose=False)
        with self.assertRaises(ParameterCheckError):
            gfco = GroupFairnessClassification(y=exp.y[:-1, ],
                                               y_hat=exp.y_hat,
                                               protected=protected,
                                               privileged='male_old',
                                               verbose=False,
                                               label=exp.label)
        with self.assertRaises(ParameterCheckError):
            gfco = GroupFairnessClassification(y=exp.y[:-1, ],
                                               y_hat=exp.y_hat[:-1, ],
                                               protected=protected,
                                               privileged='male_old',
                                               verbose=False,
                                               label=exp.label)

        with self.assertRaises(ParameterCheckError):
            gfco = exp.model_fairness(protected=protected,
                                      privileged='male_old',
                                      cutoff=1.2,
                                      verbose=False)

        with self.assertRaises(ParameterCheckError):
            gfco = exp.model_fairness(protected=protected,
                                      privileged='male_old',
                                      cutoff='not_int',
                                      verbose=False)

        # conversion check
        gfco = exp.model_fairness(protected=protected,
                                  privileged='male_old',
                                  cutoff=0.6,
                                  verbose=False)

        self.assertEqual(list(gfco.cutoff.values()), [0.6, 0.6, 0.6, 0.6])

        gfco = exp.model_fairness(protected=protected,
                                  privileged='male_old',
                                  cutoff={'male_old': 0.9},
                                  verbose=False)
        self.assertEqual(gfco.cutoff, {'male_old': 0.9, 'female_old': 0.5, 'male_young': 0.5, 'female_young': 0.5})

        np.random.seed(1)
        new_protected = np.random.choice(np.array([0, 1]), 1000)
        gfco = exp.model_fairness(protected=new_protected,
                                  privileged=1,
                                  verbose=False)

        self.assertEqual(gfco.privileged, '1')
        self.assertEqual(list(gfco.protected), list(new_protected.astype('U')))

        gfco = exp.model_fairness(protected=list(protected),
                                  privileged='male_old',
                                  verbose=False)

        self.assertEqual(type(gfco.protected), np.ndarray)

    def test_model_group_fairness(self):
        exp = FairnessTest.exp
        protected = FairnessTest.german_protected
        mgf = exp.model_fairness(protected=protected,
                                 privileged='male_old',
                                 verbose=False)

        self.assertEqual(mgf.__class__.__name__, 'GroupFairnessClassification')

    def test_plot_fairness_check(self):
        import plotly.graph_objects as go
        exp = FairnessTest.exp
        protected = FairnessTest.german_protected
        mgf = exp.model_fairness(protected=protected,
                                 privileged='male_old',
                                 verbose=False)
        fig = mgf.plot(show=False)
        self.assertEqual(fig.layout.title.text, "Fairness Check")
        self.assertEqual(fig.__class__, go.Figure)

        mgf2 = deepcopy(mgf)
        mgf.label = 'first'
        mgf2.label = 'second'

        fig = mgf.plot(objects=[mgf2], show=False)
        self.assertEqual(fig.__class__, go.Figure)

        self.assertEqual(fig['data'][0]['legendgroup'], "first")
        self.assertEqual(fig['data'][5]['legendgroup'], "second")

        # test errors in plots
        with self.assertRaises(FairnessObjectsDifferenceError):
            mgf_wrong = exp.model_fairness(protected=protected,
                                           privileged='male_young',
                                           verbose=False
                                           )
            mgf.plot([mgf_wrong])

        with self.assertRaises(FairnessObjectsDifferenceError):
            exp_wrong = deepcopy(exp)
            exp_wrong.y = exp_wrong.y[:-1]
            exp_wrong.y_hat = exp_wrong.y_hat[:-1]

            mgf_wrong = exp_wrong.model_fairness(protected=protected[:-1],
                                                 privileged='male_old',
                                                 verbose=False)
            mgf.plot([mgf_wrong])

    def test_plot_metric_scores(self):
        import plotly.graph_objects as go
        exp = FairnessTest.exp
        protected = FairnessTest.german_protected
        mgf = exp.model_fairness(protected=protected,
                                 privileged='male_old',
                                 verbose=False)
        fig = mgf.plot(show=False, type='metric_scores')
        self.assertEqual(fig.layout.title.text, "Metric Scores")
        self.assertEqual(fig.__class__, go.Figure)

        mgf2 = deepcopy(mgf)
        mgf.label = 'first'
        mgf2.label = 'second'

        fig = mgf.plot(objects=[mgf2], show=False, type='metric_scores')
        self.assertEqual(fig.__class__, go.Figure)

        self.assertEqual(fig['data'][0]['legendgroup'], "first")
        self.assertEqual(fig['data'][5]['legendgroup'], "second")

        # test errors in plots
        with self.assertRaises(FairnessObjectsDifferenceError):
            mgf_wrong = exp.model_fairness(protected=protected,
                                           privileged='male_young',
                                           verbose=False
                                           )
            mgf.plot([mgf_wrong], type='metric_scores')

        with self.assertRaises(FairnessObjectsDifferenceError):
            exp_wrong = deepcopy(exp)
            exp_wrong.y = exp_wrong.y[:-1]
            exp_wrong.y_hat = exp_wrong.y_hat[:-1]

            mgf_wrong = exp_wrong.model_fairness(protected=protected[:-1],
                                                 privileged='male_old',
                                                 verbose=False)
            mgf.plot([mgf_wrong], type='metric_scores')
Example #17
0
import dalex as dx

dx.Explainer()
Example #18
0
from _train import data, X, y, pipeline, pipeline_for_encoded_data, encoded_X
import pandas as pd
import sklearn
import numpy as np
from lime import lime_tabular
import dalex as dx

model = pipeline

X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
    X, y, train_size=0.80)
model.fit(X_train, y_train)

# create an explainer for the model:
exp = dx.Explainer(model, X, y, label="Lung's Cancer MLP Pipeline")


# BreakDown and BreakDownInt methods
def BreakDown(number_of_observation):
    bd = exp.predict_parts(pd.DataFrame(
        X_test.iloc[number_of_observation, :]).T,
                           type='break_down')
    bd.plot()


def BreakDownI(number_of_observation):
    bd_interactions = exp.predict_parts(pd.DataFrame(
        X_test.iloc[number_of_observation, :]).T,
                                        type='break_down_interactions')
    bd_interactions.plot()
Example #19
0
                               X_valid,
                               Y_valid,
                               treatment_col,
                               just_get_model=True)


def calc_uplift_filled(model, x):
    treatment_col = 11
    #x = np.array(x)
    return predict_treatment(model, treatment_col, 1)(
        np.array(x)) - predict_treatment(model, treatment_col, 0)(np.array(x))


exp = dalex.Explainer(r_xgb_model,
                      X_train,
                      Y_train,
                      label="xgboost",
                      predict_function=calc_uplift_filled)

exp.model_profile(type='partial')

#forty_ice_df = ice(data=train_X_imp_df[:5000], column='history',
#                   predict = lambda X: calc_uplift(r_xgb_model, X,treatment_col))
#ice_plot(forty_ice_df, c='dimgray', linewidth=0.3)
#plt.ylabel('Pred. AV %ile')
#plt.xlabel('history');
#ice_plot(forty_ice_df, linewidth=.5, plot_pdp=True,
#         pdp_kwargs={'c': 'k', 'linewidth': 3})
#plt.ylabel('Pred. AV %ile')
#plt.xlabel('history');
Example #20
0
    def pdp_comparator(self,
                       X,
                       y,
                       metric='abs_sum',
                       save_model_profiles=False,
                       variables=None,
                       calculate_metric_for_base_model=False):
        """
        Compares pdp profiles with given metric.
        You can save (inside this object) model profiles from dalex if save_model_profiles set to True.
        If you set save_model_profiles=True, it requiers more memory, but you can calculate very fast different metrics with pdp_comparator_change_metric method.
        
        You can choose a certain subset of features by giving a list of these feature names as a variables parameter. If it's None, all features will be calculated.
        """

        distance = RashomonSetAnalyser.distance_function_generator(metric)

        profile_base = dx.Explainer(self.base_model[1],
                                    X,
                                    y,
                                    label=self.base_model[0],
                                    verbose=False)

        if variables is None:
            profile_base = profile_base.model_profile(verbose=False)
        else:
            profile_base = profile_base.model_profile(verbose=False,
                                                      variables=variables)

        df = pd.DataFrame({'colname': profile_base.result._vname_.unique()})

        if save_model_profiles:
            self.model_profiles = [profile_base]

        y_base = profile_base.result._yhat_
        x_base = profile_base.result._x_

        sample_length = y_base.size / profile_base.result._vname_.nunique()

        for model in self.models:
            profile = dx.Explainer(model[1],
                                   X,
                                   y,
                                   label=model[0],
                                   verbose=False)

            if variables is None:
                profile = profile.model_profile(verbose=False)
            else:
                profile = profile.model_profile(verbose=False,
                                                variables=variables)

            y_result = profile.result._yhat_
            x_result = profile.result._x_

            tab_res = []
            for i in range(len(df.colname)):
                lower = int(i * sample_length)
                higher = int((i + 1) * sample_length)
                tab_res.append(
                    distance(x_base[lower:higher], y_base[lower:higher],
                             x_result[lower:higher], y_result[lower:higher]))

            df[model[0]] = tab_res

            if save_model_profiles:
                self.model_profiles.append(profile)
            else:
                del profile

        if calculate_metric_for_base_model:
            tab_res = []
            for i in range(len(df.colname)):
                lower = int(i * sample_length)
                higher = int((i + 1) * sample_length)
                tab_res.append(
                    distance(x_base[lower:higher], y_base[lower:higher],
                             x_base[lower:higher], y_base[lower:higher]))
            df[self.base_model[0]] = tab_res

        self.pdp_measures = df
        return df