Example #1
0
    def test_global_feature_importance_error_conditions_with_insufficient_query_points(
            self, method, sample_custom_query_1, custom_public_data_interface,
            sklearn_binary_classification_model_interface):
        exp = dice_ml.Dice(custom_public_data_interface,
                           sklearn_binary_classification_model_interface,
                           method=method)

        cf_explanations = exp.generate_counterfactuals(
            query_instances=sample_custom_query_1, total_CFs=15)

        with pytest.raises(
                UserConfigValidationException,
                match=
                "The number of points for which counterfactuals generated should be "
                "greater than or equal to 10 "
                "to compute global feature importance"):
            exp.global_feature_importance(
                query_instances=None,
                cf_examples_list=cf_explanations.cf_examples_list)

        with pytest.raises(
                UserConfigValidationException,
                match=
                "The number of query instances should be greater than or equal to 10 "
                "to compute global feature importance over all query points"):
            exp.global_feature_importance(
                query_instances=sample_custom_query_1, total_CFs=15)
Example #2
0
    def test_check_any_counterfactuals_computed(
            self, method, custom_public_data_interface,
            sklearn_binary_classification_model_interface):
        exp = dice_ml.Dice(custom_public_data_interface,
                           sklearn_binary_classification_model_interface,
                           method=method)

        sample_custom_query = custom_public_data_interface.data_df[0:1]
        cf_example = CounterfactualExamples(
            data_interface=custom_public_data_interface,
            test_instance_df=sample_custom_query)
        cf_examples_arr = [cf_example]

        with pytest.raises(
                UserConfigValidationException,
                match=
                "No counterfactuals found for any of the query points! Kindly check your configuration."
        ):
            exp._check_any_counterfactuals_computed(
                cf_examples_arr=cf_examples_arr)

        cf_example_has_cf = CounterfactualExamples(
            data_interface=custom_public_data_interface,
            final_cfs_df=sample_custom_query,
            test_instance_df=sample_custom_query)
        cf_example_no_cf = CounterfactualExamples(
            data_interface=custom_public_data_interface,
            test_instance_df=sample_custom_query)
        cf_examples_arr = [cf_example_has_cf, cf_example_no_cf]
        exp._check_any_counterfactuals_computed(
            cf_examples_arr=cf_examples_arr)
Example #3
0
    def on_button_clicked(b):
        with button_output:
            print("Generating explanations may take a few minutes...")
            print()

            #SETTING UP
            d = dice_ml.Data(dataframe=dataname, continuous_features=cont_feat, outcome_name=outcome_name)

            backend = 'TF'+tf.__version__[0] # TF2
            m = dice_ml.Model(model=modelname, backend=backend)

            exp = dice_ml.Dice(d, m)

            #Generating CFs
            query_instance = dict(zip(feature_names, explore.queryvaluestouse))

            if f.weightdropdown.value=='Use Default Weights':
                dice_exp = exp.generate_counterfactuals(query_instance,total_CFs=num_exp.value, desired_class="opposite",
                                                        features_to_vary=f.useusing,
                                                        proximity_weight=prox.value, diversity_weight=div.value)
            elif f.weightdropdown.value=='Choose Your Own Weights':
                #putting weights into dict
                weightstouse=dict(zip(f.useusing, f.weightvaluestouse))
                dice_exp = exp.generate_counterfactuals(query_instance, total_CFs=num_exp.value, desired_class="opposite",
                                                        features_to_vary=f.useusing, feature_weights=weightstouse,
                                                        proximity_weight=prox.value, diversity_weight=div.value)

            explore.dice_exp=dice_exp
Example #4
0
    def __init__(self, mlmodel: MLModel, hyperparams: Optional[Dict] = None) -> None:

        supported_backends = ["tensorflow", "pytorch"]
        if mlmodel.backend not in supported_backends:
            raise ValueError(
                f"{mlmodel.backend} is not in supported backends {supported_backends}"
            )

        super().__init__(mlmodel)
        self._continuous = mlmodel.data.continuous
        self._categorical = mlmodel.data.categorical
        self._target = mlmodel.data.target
        self._model = mlmodel

        checked_hyperparams = merge_default_parameters(
            hyperparams, self._DEFAULT_HYPERPARAMS
        )
        # Prepare data for dice data structure
        self._dice_data = dice_ml.Data(
            dataframe=mlmodel.data.df,
            continuous_features=self._continuous,
            outcome_name=self._target,
        )

        self._dice_model = dice_ml.Model(model=mlmodel, backend="sklearn")

        self._dice = dice_ml.Dice(self._dice_data, self._dice_model, method="random")
        self._num = checked_hyperparams["num"]
        self._desired_class = checked_hyperparams["desired_class"]
        self._post_hoc_sparsity_param = checked_hyperparams["posthoc_sparsity_param"]
Example #5
0
 def _get_exp(self,
              backend,
              method="random",
              is_public_data_interface=True):
     if is_public_data_interface:
         dataset = helpers.load_adult_income_dataset()
         d = dice_ml.Data(dataframe=dataset,
                          continuous_features=['age', 'hours_per_week'],
                          outcome_name='income')
     else:
         d = dice_ml.Data(features={
             'age': [17, 90],
             'workclass':
             ['Government', 'Other/Unknown', 'Private', 'Self-Employed'],
             'education': [
                 'Assoc', 'Bachelors', 'Doctorate', 'HS-grad', 'Masters',
                 'Prof-school', 'School', 'Some-college'
             ],
             'marital_status':
             ['Divorced', 'Married', 'Separated', 'Single', 'Widowed'],
             'occupation': [
                 'Blue-Collar', 'Other/Unknown', 'Professional', 'Sales',
                 'Service', 'White-Collar'
             ],
             'race': ['Other', 'White'],
             'gender': ['Female', 'Male'],
             'hours_per_week': [1, 99]
         },
                          outcome_name='income')
     ML_modelpath = helpers.get_adult_income_modelpath(backend=backend)
     m = dice_ml.Model(model_path=ML_modelpath, backend=backend)
     exp = dice_ml.Dice(d, m, method=method)
     return exp
Example #6
0
 def _get_exp(self, backend, method="random"):
     dataset = helpers.load_adult_income_dataset()
     d = dice_ml.Data(dataframe=dataset, continuous_features=['age', 'hours_per_week'], outcome_name='income')
     ML_modelpath = helpers.get_adult_income_modelpath(backend=backend)
     m = dice_ml.Model(model_path=ML_modelpath, backend=backend)
     exp = dice_ml.Dice(d, m, method=method)
     return exp
Example #7
0
    def test_local_feature_importance_error_conditions_with_insufficient_cfs_per_query_point(
            self, desired_class, method, sample_custom_query_1,
            custom_public_data_interface,
            sklearn_binary_classification_model_interface):
        exp = dice_ml.Dice(custom_public_data_interface,
                           sklearn_binary_classification_model_interface,
                           method=method)

        cf_explanations = exp.generate_counterfactuals(
            query_instances=sample_custom_query_1,
            total_CFs=1,
            desired_class=desired_class)

        with pytest.raises(
                UserConfigValidationException,
                match=
                "The number of counterfactuals generated per query instance should be "
                "greater than or equal to 10 to compute feature importance for all query points"
        ):
            exp.local_feature_importance(
                query_instances=None,
                cf_examples_list=cf_explanations.cf_examples_list)

        with pytest.raises(
                UserConfigValidationException,
                match="The number of counterfactuals requested per "
                "query instance should be greater than or equal to 10 "
                "to compute feature importance for all query points"):
            exp.local_feature_importance(query_instances=sample_custom_query_1,
                                         total_CFs=1,
                                         desired_class=desired_class)
Example #8
0
    def test_desired_class(self, desired_class, total_CFs, method,
                           genetic_initialization, sample_custom_query_2,
                           custom_public_data_interface,
                           sklearn_multiclass_classification_model_interface):
        exp = dice_ml.Dice(custom_public_data_interface,
                           sklearn_multiclass_classification_model_interface,
                           method=method)

        if method != 'genetic':
            ans = exp.generate_counterfactuals(
                query_instances=sample_custom_query_2,
                total_CFs=total_CFs,
                desired_class=desired_class)
        else:
            ans = exp.generate_counterfactuals(
                query_instances=sample_custom_query_2,
                total_CFs=total_CFs,
                desired_class=desired_class,
                initialization=genetic_initialization)

        assert ans is not None
        if method != 'kdtree':
            assert all(ans.cf_examples_list[0].final_cfs_df[
                exp.data_interface.outcome_name].values == [desired_class] *
                       total_CFs)
        else:
            assert all(ans.cf_examples_list[0].final_cfs_df_sparse[
                exp.data_interface.outcome_name].values == [desired_class] *
                       total_CFs)
        assert all(i == desired_class for i in exp.cfs_preds)
Example #9
0
    def test_numeric_categories(self, desired_range, method,
                                create_boston_data):
        x_train, x_test, y_train, y_test, feature_names = \
            create_boston_data

        rfc = RandomForestRegressor(n_estimators=10,
                                    max_depth=4,
                                    random_state=777)
        model = rfc.fit(x_train, y_train)

        dataset_train = x_train.copy()
        dataset_train['Outcome'] = y_train
        feature_names.remove('CHAS')

        d = dice_ml.Data(dataframe=dataset_train,
                         continuous_features=feature_names,
                         outcome_name='Outcome')
        m = dice_ml.Model(model=model,
                          backend='sklearn',
                          model_type='regressor')
        exp = dice_ml.Dice(d, m, method=method)

        cf_explanation = exp.generate_counterfactuals(
            query_instances=x_test.iloc[0:1],
            total_CFs=10,
            desired_range=desired_range)

        assert cf_explanation is not None
Example #10
0
def random_binary_classification_exp_object():
    backend = 'sklearn'
    dataset = helpers.load_custom_testing_dataset()
    d = dice_ml.Data(dataframe=dataset, continuous_features=['Numerical'], outcome_name='Outcome')
    ML_modelpath = helpers.get_custom_dataset_modelpath_pipeline()
    m = dice_ml.Model(model_path=ML_modelpath, backend=backend)
    exp = dice_ml.Dice(d, m, method='random')
    return exp
Example #11
0
def pyt_exp_object():
    backend = 'PYT'
    dataset = helpers.load_adult_income_dataset()
    d = dice_ml.Data(dataframe=dataset, continuous_features=['age', 'hours_per_week'], outcome_name='income')
    ML_modelpath = helpers.get_adult_income_modelpath(backend=backend)
    m = dice_ml.Model(model_path= ML_modelpath, backend=backend)
    exp = dice_ml.Dice(d, m)
    return exp
Example #12
0
def regression_exp_object(method="random"):
    backend = 'sklearn'
    dataset = helpers.load_custom_testing_dataset_regression()
    d = dice_ml.Data(dataframe=dataset, continuous_features=['Numerical'], outcome_name='Outcome')
    ML_modelpath = helpers.get_custom_dataset_modelpath_pipeline_regression()
    m = dice_ml.Model(model_path=ML_modelpath, backend=backend, model_type='regressor')
    exp = dice_ml.Dice(d, m, method=method)
    return exp
Example #13
0
def binary_classification_exp_object_out_of_order(method="random"):
    backend = 'sklearn'
    dataset = helpers.load_outcome_not_last_column_dataset()
    d = dice_ml.Data(dataframe=dataset, continuous_features=['Numerical'], outcome_name='Outcome')
    ML_modelpath = helpers.get_custom_dataset_modelpath_pipeline_binary()
    m = dice_ml.Model(model_path=ML_modelpath, backend=backend)
    exp = dice_ml.Dice(d, m, method=method)
    return exp
Example #14
0
 def test_query_instance_unknown_column(
         self, desired_class, method, sample_custom_query_5,
         custom_public_data_interface,
         sklearn_binary_classification_model_interface):
     exp = dice_ml.Dice(custom_public_data_interface,
                        sklearn_binary_classification_model_interface,
                        method=method)
     with pytest.raises(ValueError, match='not present in training data'):
         exp.generate_counterfactuals(query_instances=sample_custom_query_5,
                                      total_CFs=3,
                                      desired_class=desired_class)
Example #15
0
 def test_zero_totalcfs(self, desired_class, method, sample_custom_query_1,
                        custom_public_data_interface,
                        sklearn_multiclass_classification_model_interface):
     exp = dice_ml.Dice(custom_public_data_interface,
                        sklearn_multiclass_classification_model_interface,
                        method=method)
     with pytest.raises(UserConfigValidationException):
         exp.generate_counterfactuals(
             query_instances=[sample_custom_query_1],
             total_CFs=0,
             desired_class=desired_class)
Example #16
0
 def test_query_instance_outside_bounds(
         self, desired_class, method, sample_custom_query_3,
         custom_public_data_interface,
         sklearn_binary_classification_model_interface):
     exp = dice_ml.Dice(custom_public_data_interface,
                        sklearn_binary_classification_model_interface,
                        method=method)
     with pytest.raises(ValueError,
                        match='has a value outside the dataset'):
         exp.generate_counterfactuals(query_instances=sample_custom_query_3,
                                      total_CFs=1,
                                      desired_class=desired_class)
Example #17
0
 def test_unsupported_binary_class(
         self, desired_class, method, sample_custom_query_1,
         custom_public_data_interface,
         sklearn_binary_classification_model_interface):
     exp = dice_ml.Dice(custom_public_data_interface,
                        sklearn_binary_classification_model_interface,
                        method=method)
     with pytest.raises(UserConfigValidationException) as ucve:
         exp.generate_counterfactuals(query_instances=sample_custom_query_1,
                                      total_CFs=3,
                                      desired_class=desired_class)
     if desired_class == 100:
         assert "Desired class not present in training data!" in str(ucve)
     else:
         assert "The target class for {0} could not be identified".format(
             desired_class) in str(ucve)
Example #18
0
 def test_incorrect_features_to_vary_list(
         self, desired_class, method, sample_custom_query_1,
         custom_public_data_interface,
         sklearn_binary_classification_model_interface):
     exp = dice_ml.Dice(custom_public_data_interface,
                        sklearn_binary_classification_model_interface,
                        method=method)
     with pytest.raises(UserConfigValidationException,
                        match="Got features {" + "'unknown_feature'" +
                        "} which are not present in training data"):
         exp.generate_counterfactuals(query_instances=sample_custom_query_1,
                                      total_CFs=10,
                                      desired_class=desired_class,
                                      desired_range=None,
                                      permitted_range=None,
                                      features_to_vary=['unknown_feature'])
Example #19
0
 def test_unsupported_multiclass(
         self, desired_class, total_CFs, method, sample_custom_query_4,
         custom_public_data_interface,
         sklearn_multiclass_classification_model_interface):
     exp = dice_ml.Dice(custom_public_data_interface,
                        sklearn_multiclass_classification_model_interface,
                        method=method)
     with pytest.raises(UserConfigValidationException) as ucve:
         exp.generate_counterfactuals(query_instances=sample_custom_query_4,
                                      total_CFs=total_CFs,
                                      desired_class=desired_class)
     if desired_class == 100:
         assert "Desired class not present in training data!" in str(ucve)
     else:
         assert "Desired class cannot be opposite if the number of classes is more than 2." in str(
             ucve)
Example #20
0
 def test_desired_class(self, desired_class, method, sample_custom_query_2,
                        custom_public_data_interface,
                        sklearn_binary_classification_model_interface):
     exp = dice_ml.Dice(custom_public_data_interface,
                        sklearn_binary_classification_model_interface,
                        method=method)
     ans = exp.generate_counterfactuals(
         query_instances=sample_custom_query_2,
         features_to_vary='all',
         total_CFs=2,
         desired_class=desired_class,
         permitted_range=None)
     if method != 'kdtree':
         assert all(ans.cf_examples_list[0].final_cfs_df[
             exp.data_interface.outcome_name].values == [desired_class] * 2)
     else:
         assert all(ans.cf_examples_list[0].final_cfs_df_sparse[
             exp.data_interface.outcome_name].values == [desired_class] * 2)
Example #21
0
    def test_incorrect_values_permitted_range(
            self, desired_class, method, sample_custom_query_1,
            custom_public_data_interface,
            sklearn_binary_classification_model_interface):
        exp = dice_ml.Dice(custom_public_data_interface,
                           sklearn_binary_classification_model_interface,
                           method=method)
        with pytest.raises(UserConfigValidationException) as ucve:
            exp.generate_counterfactuals(
                query_instances=sample_custom_query_1,
                total_CFs=10,
                desired_class=desired_class,
                desired_range=None,
                permitted_range={'Categorical': ['d']},
                features_to_vary='all')

        assert 'The category {0} does not occur in the training data for feature {1}. Allowed categories are {2}'.format(
            'd', 'Categorical', ['a', 'b', 'c']) in str(ucve)
def get_explainer_object(model_path, model_backend, data_object):
    """
    Provides feature importances to explain the model.
    
    Parameters:
    model: trained model
    model_backend: indicates the implementation type of DiCE we want to use.
    data_object: DiCE data object
    
    Returns:
    explainer (object): provides the feature importances that determines the prediction of the model
    
    """
    model_object = dice_ml.Model(model_path=model_path, backend=model_backend) 
    
    explainer = dice_ml.Dice(data_object, model_object)
    
    return explainer
Example #23
0
 def test_zero_cfs_internal(self, method, features_to_vary, desired_class,
                            desired_range, sample_custom_query_2, total_CFs,
                            permitted_range, custom_public_data_interface,
                            sklearn_binary_classification_model_interface):
     if method == 'genetic':
         pytest.skip(
             'DiceGenetic explainer does not handle the total counterfactuals as zero'
         )
     exp = dice_ml.Dice(custom_public_data_interface,
                        sklearn_binary_classification_model_interface,
                        method=method)
     features_to_vary = exp.setup(features_to_vary, None,
                                  sample_custom_query_2, "inverse_mad")
     exp._generate_counterfactuals(features_to_vary=features_to_vary,
                                   query_instance=sample_custom_query_2,
                                   total_CFs=total_CFs,
                                   desired_class=desired_class,
                                   desired_range=desired_range,
                                   permitted_range=permitted_range)
Example #24
0
    def get_counterfactual(self, data_rows, y, ds):
        # TODO: What about y?
        #        - I think the model is called on X again, so no need to pass prediction in again?
        X, y = ds.pandas()
        df = pd.concat((X, y), axis=1)
        d = dice_ml.Data(dataframe=X,
                         continuous_features=continous_columns,
                         outcome_name='income')
        backend = 'PYT'
        m = dice_ml.Model(model=self, backend=backend)
        exp = dice_ml.Dice(d, m)

        instances = pd.DataFrame.to_dict(X.iloc[data_rows], orient='record')
        res = []
        for i in range(len(instances)):
            dice_exp = exp.generate_counterfactuals(
                instances[i],
                total_CFs=1,
                desired_class="opposite",
                proximity_weight=0.5,
                diversity_weight=1,
                categorical_penalty=0.1,
                algorithm="DiverseCF",
                features_to_vary="all",
                yloss_type="hinge_loss",
                diversity_loss_type="dpp_style:inverse_dist",
                feature_weights="inverse_mad",
                optimizer="pytorch:adam",
                learning_rate=0.05,
                min_iter=500,
                max_iter=5000,
                project_iter=0,
                loss_diff_thres=1e-5,
                loss_converge_maxiter=1,
                verbose=False,
                init_near_query_instance=True,
                tie_random=False,
                stopping_threshold=0.5,
                posthoc_sparsity_param=0.1,
                posthoc_sparsity_algorithm="binary")
            res.append(dice_exp.final_cfs_df)
        return pd.concat(res).reset_index()
Example #25
0
def generate_CF(instance):
    X, y = Adult('dataset', train=True).pandas()
    ds = pd.concat((X, y), axis=1)
    d = dice_ml.Data(dataframe=ds, continuous_features=continous_columns, outcome_name='income')
    backend = 'PYT'
    model = FcNet()
    m = dice_ml.Model(model=model, backend=backend)
    exp = dice_ml.Dice(d, m)
    instance = pd.DataFrame.to_dict(instance,orient ='record')
    dice_exp = exp.generate_counterfactuals(instance[0], total_CFs=1, desired_class="opposite",
                                            proximity_weight=0.5, diversity_weight=1, categorical_penalty=0.1, 
                                            algorithm="DiverseCF", features_to_vary="all", yloss_type="hinge_loss", 
                                            diversity_loss_type="dpp_style:inverse_dist", 
                                            feature_weights="inverse_mad", optimizer="pytorch:adam", 
                                            learning_rate=0.05, min_iter=500, max_iter=1000, project_iter=0, 
                                            loss_diff_thres=1e-5, loss_converge_maxiter=1, verbose=False, 
                                            init_near_query_instance=True, tie_random=False, 
                                            stopping_threshold=0.5, posthoc_sparsity_param=0.1, 
                                            posthoc_sparsity_algorithm="binary")
    res = dice_exp.final_cfs_df
    return res
Example #26
0
    def test_local_feature_importance(
            self, desired_class, method, sample_custom_query_1,
            sample_counterfactual_example_dummy, custom_public_data_interface,
            sklearn_binary_classification_model_interface):
        exp = dice_ml.Dice(custom_public_data_interface,
                           sklearn_binary_classification_model_interface,
                           method=method)
        sample_custom_query = pd.concat(
            [sample_custom_query_1, sample_custom_query_1])
        cf_explanations = exp.generate_counterfactuals(
            query_instances=sample_custom_query,
            total_CFs=15,
            desired_class=desired_class)

        cf_explanations.cf_examples_list[
            0].final_cfs_df = sample_counterfactual_example_dummy.copy()
        cf_explanations.cf_examples_list[
            0].final_cfs_df_sparse = sample_counterfactual_example_dummy.copy(
            )
        cf_explanations.cf_examples_list[0].final_cfs_df.drop([0, 1, 2],
                                                              inplace=True)
        cf_explanations.cf_examples_list[0].final_cfs_df_sparse.drop(
            [0, 1, 2], inplace=True)

        cf_explanations.cf_examples_list[
            1].final_cfs_df = sample_counterfactual_example_dummy.copy()
        cf_explanations.cf_examples_list[
            1].final_cfs_df_sparse = sample_counterfactual_example_dummy.copy(
            )
        cf_explanations.cf_examples_list[1].final_cfs_df.drop([0],
                                                              inplace=True)
        cf_explanations.cf_examples_list[1].final_cfs_df_sparse.drop(
            [0], inplace=True)

        local_importances = exp.local_feature_importance(
            query_instances=None,
            cf_examples_list=cf_explanations.cf_examples_list)

        for local_importance in local_importances.local_importance:
            self._verify_feature_importance(local_importance)
Example #27
0
    def test_global_feature_importance(
            self, desired_class, method, sample_custom_query_10,
            sample_counterfactual_example_dummy, custom_public_data_interface,
            sklearn_binary_classification_model_interface):
        exp = dice_ml.Dice(custom_public_data_interface,
                           sklearn_binary_classification_model_interface,
                           method=method)

        cf_explanations = exp.generate_counterfactuals(
            query_instances=sample_custom_query_10,
            total_CFs=15,
            desired_class=desired_class)

        cf_explanations.cf_examples_list[
            0].final_cfs_df = sample_counterfactual_example_dummy.copy()
        cf_explanations.cf_examples_list[
            0].final_cfs_df_sparse = sample_counterfactual_example_dummy.copy(
            )
        cf_explanations.cf_examples_list[0].final_cfs_df.drop([0, 1, 2, 3, 4],
                                                              inplace=True)
        cf_explanations.cf_examples_list[0].final_cfs_df_sparse.drop(
            [0, 1, 2, 3, 4], inplace=True)

        for index in range(1, len(cf_explanations.cf_examples_list)):
            cf_explanations.cf_examples_list[
                index].final_cfs_df = sample_counterfactual_example_dummy.copy(
                )
            cf_explanations.cf_examples_list[
                index].final_cfs_df_sparse = sample_counterfactual_example_dummy.copy(
                )

        global_importance = exp.global_feature_importance(
            query_instances=None,
            cf_examples_list=cf_explanations.cf_examples_list)

        self._verify_feature_importance(global_importance.summary_importance)
Example #28
0
    def test_permitted_range(self, desired_class, method, total_CFs,
                             permitted_range, sample_custom_query_2,
                             custom_public_data_interface,
                             sklearn_binary_classification_model_interface):
        exp = dice_ml.Dice(custom_public_data_interface,
                           sklearn_binary_classification_model_interface,
                           method=method)
        ans = exp.generate_counterfactuals(
            query_instances=sample_custom_query_2,
            permitted_range=permitted_range,
            total_CFs=total_CFs,
            desired_class=desired_class)

        for feature in permitted_range:
            if method != 'kdtree':
                assert all(
                    permitted_range[feature][0] <=
                    ans.cf_examples_list[0].final_cfs_df[feature].values[i] <=
                    permitted_range[feature][1] for i in range(total_CFs))
            else:
                assert all(
                    permitted_range[feature][0] <= ans.cf_examples_list[0].
                    final_cfs_df_sparse[feature].values[i] <=
                    permitted_range[feature][1] for i in range(total_CFs))
Example #29
0
# pip install dice-ml
import dice_ml
# Dataset
data_dice = dice_ml.Data(
    dataframe=data_loader.data,
    # For perturbation strategy
    continuous_features=['age', 'avg_glucose_level', 'bmi'],
    outcome_name='stroke')
# Model
rf_dice = dice_ml.Model(
    model=rf,
    # There exist backends for tf, torch, ...
    backend="sklearn")
explainer = dice_ml.Dice(
    data_dice,
    rf_dice,
    # Random sampling, genetic algorithm, kd-tree,...
    method="random")

# %% Create explanation
# Generate CF based on the blackbox model
input_datapoint = X_test[0:1]
cf = explainer.generate_counterfactuals(input_datapoint,
                                        total_CFs=3,
                                        desired_class="opposite")
# Visualize it
cf.visualize_as_dataframe(show_only_changes=True)

# %% Create feasible (conditional) Counterfactuals
features_to_vary = ['avg_glucose_level', 'bmi', 'smoking_status_smokes']
permitted_range = {'avg_glucose_level': [50, 250], 'bmi': [18, 35]}
Example #30
0
    def test_generate_counterfactuals_user_config_validations(
            self, method, sample_custom_query_2, custom_public_data_interface,
            sklearn_binary_classification_model_interface, explainer_function):
        exp = dice_ml.Dice(custom_public_data_interface,
                           sklearn_binary_classification_model_interface,
                           method=method)

        explainer_function = getattr(exp, explainer_function)
        with pytest.raises(
                UserConfigValidationException,
                match=
                r"The number of counterfactuals generated per query instance \(total_CFs\) "
                "should be a positive integer."):
            explainer_function(query_instances=sample_custom_query_2,
                               total_CFs=-10,
                               desired_class='opposite')

        with pytest.raises(
                UserConfigValidationException,
                match=
                r"The number of counterfactuals generated per query instance \(total_CFs\) "
                "should be a positive integer."):
            explainer_function(query_instances=sample_custom_query_2,
                               total_CFs=0,
                               desired_class="opposite")

        with pytest.raises(
                UserConfigValidationException,
                match=
                r"The posthoc_sparsity_algorithm should be linear or binary and not random"
        ):
            explainer_function(query_instances=sample_custom_query_2,
                               total_CFs=10,
                               posthoc_sparsity_algorithm='random')

        with pytest.raises(
                UserConfigValidationException,
                match=
                r"The posthoc_sparsity_algorithm should be linear or binary and not random"
        ):
            explainer_function(query_instances=sample_custom_query_2,
                               total_CFs=10,
                               posthoc_sparsity_algorithm='random')

        with pytest.raises(
                UserConfigValidationException,
                match=r'The stopping_threshold should lie between 0.0 and 1.0'
        ):
            explainer_function(query_instances=sample_custom_query_2,
                               total_CFs=10,
                               stopping_threshold=-10.0)

        with pytest.raises(
                UserConfigValidationException,
                match=
                r'The posthoc_sparsity_param should lie between 0.0 and 1.0'):
            explainer_function(query_instances=sample_custom_query_2,
                               total_CFs=10,
                               posthoc_sparsity_param=-10.0)

        with pytest.raises(
                UserConfigValidationException,
                match=
                r'The desired_range parameter should not be set for classification task'
        ):
            explainer_function(query_instances=sample_custom_query_2,
                               total_CFs=10,
                               desired_range=[0, 10])