예제 #1
0
    def test_model_user_validation_model_type(self, create_iris_data):
        x_train, x_test, y_train, y_test, feature_names, classes = \
            create_iris_data
        trained_model = self.create_sklearn_random_forest_classifier(x_train, y_train)

        assert dice_ml.Model(model=trained_model, backend='sklearn', model_type='classifier') is not None
        assert dice_ml.Model(model=trained_model, backend='sklearn', model_type='regressor') is not None

        with pytest.raises(UserConfigValidationException):
            dice_ml.Model(model=trained_model, backend='sklearn', model_type='random')
예제 #2
0
    def test_base_model_regression(self, create_boston_data):
        x_train, x_test, y_train, y_test, feature_names = \
            create_boston_data
        trained_model = self.create_sklearn_random_forest_regressor(
            x_train, y_train)

        diceml_model = dice_ml.Model(model=trained_model,
                                     model_type='regressor',
                                     backend='sklearn')
        diceml_model.transformer.initialize_transform_func()

        assert diceml_model is not None

        prediction_probabilities = diceml_model.get_output(x_test).reshape(
            -1, 1)
        assert prediction_probabilities.shape[0] == x_test.shape[0]
        assert prediction_probabilities.shape[1] == 1

        predictions = diceml_model.get_output(x_test,
                                              model_score=False).reshape(
                                                  -1, 1)
        assert predictions.shape[0] == x_test.shape[0]
        assert predictions.shape[1] == 1

        with pytest.raises(NotImplementedError):
            diceml_model.get_gradient()

        with pytest.raises(SystemException):
            diceml_model.get_num_output_nodes2(x_test)
예제 #3
0
파일: test_model.py 프로젝트: rmazzine/DiCE
 def test_model_user_validation_no_valid_model(self):
     with pytest.raises(
             ValueError,
             match=
             "should provide either a trained model or the path to a model"
     ):
         dice_ml.Model(backend='sklearn')
def tf_model_object():
    backend = 'TF' + tf.__version__[0]
    ML_modelpath = helpers.get_adult_income_modelpath(backend=backend)
    m = dice_ml.Model(model_path=ML_modelpath,
                      backend=backend,
                      func='ohe-min-max')
    return m
예제 #5
0
    def on_button_clicked(b):
        with button_output:
            print("Generating explanations may take a few minutes...")
            print()

            #SETTING UP
            d = dice_ml.Data(dataframe=dataname, continuous_features=cont_feat, outcome_name=outcome_name)

            backend = 'TF'+tf.__version__[0] # TF2
            m = dice_ml.Model(model=modelname, backend=backend)

            exp = dice_ml.Dice(d, m)

            #Generating CFs
            query_instance = dict(zip(feature_names, explore.queryvaluestouse))

            if f.weightdropdown.value=='Use Default Weights':
                dice_exp = exp.generate_counterfactuals(query_instance,total_CFs=num_exp.value, desired_class="opposite",
                                                        features_to_vary=f.useusing,
                                                        proximity_weight=prox.value, diversity_weight=div.value)
            elif f.weightdropdown.value=='Choose Your Own Weights':
                #putting weights into dict
                weightstouse=dict(zip(f.useusing, f.weightvaluestouse))
                dice_exp = exp.generate_counterfactuals(query_instance, total_CFs=num_exp.value, desired_class="opposite",
                                                        features_to_vary=f.useusing, feature_weights=weightstouse,
                                                        proximity_weight=prox.value, diversity_weight=div.value)

            explore.dice_exp=dice_exp
예제 #6
0
 def _get_exp(self, backend, method="random"):
     dataset = helpers.load_adult_income_dataset()
     d = dice_ml.Data(dataframe=dataset, continuous_features=['age', 'hours_per_week'], outcome_name='income')
     ML_modelpath = helpers.get_adult_income_modelpath(backend=backend)
     m = dice_ml.Model(model_path=ML_modelpath, backend=backend)
     exp = dice_ml.Dice(d, m, method=method)
     return exp
예제 #7
0
def pyt_model_object():
    backend = 'PYT'
    ML_modelpath = helpers.get_adult_income_modelpath(backend=backend)
    m = dice_ml.Model(model_path=ML_modelpath,
                      backend=backend,
                      func='ohe-min-max')
    return m
예제 #8
0
파일: model.py 프로젝트: indyfree/CARLA
    def __init__(self, mlmodel: MLModel, hyperparams: Optional[Dict] = None) -> None:

        supported_backends = ["tensorflow", "pytorch"]
        if mlmodel.backend not in supported_backends:
            raise ValueError(
                f"{mlmodel.backend} is not in supported backends {supported_backends}"
            )

        super().__init__(mlmodel)
        self._continuous = mlmodel.data.continuous
        self._categorical = mlmodel.data.categorical
        self._target = mlmodel.data.target
        self._model = mlmodel

        checked_hyperparams = merge_default_parameters(
            hyperparams, self._DEFAULT_HYPERPARAMS
        )
        # Prepare data for dice data structure
        self._dice_data = dice_ml.Data(
            dataframe=mlmodel.data.df,
            continuous_features=self._continuous,
            outcome_name=self._target,
        )

        self._dice_model = dice_ml.Model(model=mlmodel, backend="sklearn")

        self._dice = dice_ml.Dice(self._dice_data, self._dice_model, method="random")
        self._num = checked_hyperparams["num"]
        self._desired_class = checked_hyperparams["desired_class"]
        self._post_hoc_sparsity_param = checked_hyperparams["posthoc_sparsity_param"]
예제 #9
0
    def test_numeric_categories(self, desired_range, method,
                                create_boston_data):
        x_train, x_test, y_train, y_test, feature_names = \
            create_boston_data

        rfc = RandomForestRegressor(n_estimators=10,
                                    max_depth=4,
                                    random_state=777)
        model = rfc.fit(x_train, y_train)

        dataset_train = x_train.copy()
        dataset_train['Outcome'] = y_train
        feature_names.remove('CHAS')

        d = dice_ml.Data(dataframe=dataset_train,
                         continuous_features=feature_names,
                         outcome_name='Outcome')
        m = dice_ml.Model(model=model,
                          backend='sklearn',
                          model_type='regressor')
        exp = dice_ml.Dice(d, m, method=method)

        cf_explanation = exp.generate_counterfactuals(
            query_instances=x_test.iloc[0:1],
            total_CFs=10,
            desired_range=desired_range)

        assert cf_explanation is not None
예제 #10
0
    def test_base_model_classification(self, create_iris_data):
        x_train, x_test, y_train, y_test, feature_names, classes = \
            create_iris_data
        trained_model = self.create_sklearn_random_forest_classifier(
            x_train, y_train)

        diceml_model = dice_ml.Model(model=trained_model, backend='sklearn')
        diceml_model.transformer.initialize_transform_func()

        assert diceml_model is not None

        prediction_probabilities = diceml_model.get_output(x_test)
        assert prediction_probabilities.shape[0] == x_test.shape[0]
        assert prediction_probabilities.shape[1] == len(classes)

        predictions = diceml_model.get_output(x_test,
                                              model_score=False).reshape(
                                                  -1, 1)
        assert predictions.shape[0] == x_test.shape[0]
        assert predictions.shape[1] == 1
        assert np.all(np.unique(predictions) == np.unique(y_test))

        with pytest.raises(NotImplementedError):
            diceml_model.get_gradient()

        assert diceml_model.get_num_output_nodes2(x_test) == len(classes)
예제 #11
0
파일: test_dice.py 프로젝트: rmazzine/DiCE
 def _get_exp(self,
              backend,
              method="random",
              is_public_data_interface=True):
     if is_public_data_interface:
         dataset = helpers.load_adult_income_dataset()
         d = dice_ml.Data(dataframe=dataset,
                          continuous_features=['age', 'hours_per_week'],
                          outcome_name='income')
     else:
         d = dice_ml.Data(features={
             'age': [17, 90],
             'workclass':
             ['Government', 'Other/Unknown', 'Private', 'Self-Employed'],
             'education': [
                 'Assoc', 'Bachelors', 'Doctorate', 'HS-grad', 'Masters',
                 'Prof-school', 'School', 'Some-college'
             ],
             'marital_status':
             ['Divorced', 'Married', 'Separated', 'Single', 'Widowed'],
             'occupation': [
                 'Blue-Collar', 'Other/Unknown', 'Professional', 'Sales',
                 'Service', 'White-Collar'
             ],
             'race': ['Other', 'White'],
             'gender': ['Female', 'Male'],
             'hours_per_week': [1, 99]
         },
                          outcome_name='income')
     ML_modelpath = helpers.get_adult_income_modelpath(backend=backend)
     m = dice_ml.Model(model_path=ML_modelpath, backend=backend)
     exp = dice_ml.Dice(d, m, method=method)
     return exp
예제 #12
0
def regression_exp_object(method="random"):
    backend = 'sklearn'
    dataset = helpers.load_custom_testing_dataset_regression()
    d = dice_ml.Data(dataframe=dataset, continuous_features=['Numerical'], outcome_name='Outcome')
    ML_modelpath = helpers.get_custom_dataset_modelpath_pipeline_regression()
    m = dice_ml.Model(model_path=ML_modelpath, backend=backend, model_type='regressor')
    exp = dice_ml.Dice(d, m, method=method)
    return exp
예제 #13
0
def binary_classification_exp_object_out_of_order(method="random"):
    backend = 'sklearn'
    dataset = helpers.load_outcome_not_last_column_dataset()
    d = dice_ml.Data(dataframe=dataset, continuous_features=['Numerical'], outcome_name='Outcome')
    ML_modelpath = helpers.get_custom_dataset_modelpath_pipeline_binary()
    m = dice_ml.Model(model_path=ML_modelpath, backend=backend)
    exp = dice_ml.Dice(d, m, method=method)
    return exp
예제 #14
0
def pyt_exp_object():
    backend = 'PYT'
    dataset = helpers.load_adult_income_dataset()
    d = dice_ml.Data(dataframe=dataset, continuous_features=['age', 'hours_per_week'], outcome_name='income')
    ML_modelpath = helpers.get_adult_income_modelpath(backend=backend)
    m = dice_ml.Model(model_path= ML_modelpath, backend=backend)
    exp = dice_ml.Dice(d, m)
    return exp
예제 #15
0
def random_binary_classification_exp_object():
    backend = 'sklearn'
    dataset = helpers.load_custom_testing_dataset()
    d = dice_ml.Data(dataframe=dataset, continuous_features=['Numerical'], outcome_name='Outcome')
    ML_modelpath = helpers.get_custom_dataset_modelpath_pipeline()
    m = dice_ml.Model(model_path=ML_modelpath, backend=backend)
    exp = dice_ml.Dice(d, m, method='random')
    return exp
예제 #16
0
def test_model_initiation_fullpath():
    """
    Tests if model is initiated when full path to a model and explainer class is given to backend parameter.
    """
    pyt = pytest.importorskip("torch")
    backend = {'model': 'pytorch_model.PyTorchModel',
            'explainer': 'dice_pytorch.DicePyTorch'}
    ML_modelpath = helpers.get_adult_income_modelpath(backend=backend)
    m = dice_ml.Model(model_path= ML_modelpath, backend=backend)
    assert isinstance(m, dice_ml.model_interfaces.pytorch_model.PyTorchModel)
예제 #17
0
    def _create_diceml_explainer(self, method, continuous_features):

        dice_data = dice_ml.Data(dataframe=self._train,
                                 continuous_features=continuous_features,
                                 outcome_name=self._target_column)
        model_type = CounterfactualConstants.CLASSIFIER \
            if self._task_type == ModelTask.CLASSIFICATION else \
            CounterfactualConstants.REGRESSOR
        dice_model = dice_ml.Model(model=self._model,
                                   backend=CounterfactualConstants.SKLEARN,
                                   model_type=model_type)

        dice_explainer = Dice(dice_data, dice_model, method=method)

        return dice_explainer
def test_model_initiation_fullpath():
    """
    Tests if model is initiated when full path to a model and explainer class is given to backend parameter.
    """
    tf_version = tf.__version__[0]
    backend = {
        'model':
        'keras_tensorflow_model.KerasTensorFlowModel',
        'explainer':
        'dice_tensorflow' + tf_version + '.DiceTensorFlow' + tf_version
    }
    ML_modelpath = helpers.get_adult_income_modelpath(backend=backend)
    m = dice_ml.Model(model_path=ML_modelpath, backend=backend)
    assert isinstance(
        m,
        dice_ml.model_interfaces.keras_tensorflow_model.KerasTensorFlowModel)
def get_explainer_object(model_path, model_backend, data_object):
    """
    Provides feature importances to explain the model.
    
    Parameters:
    model: trained model
    model_backend: indicates the implementation type of DiCE we want to use.
    data_object: DiCE data object
    
    Returns:
    explainer (object): provides the feature importances that determines the prediction of the model
    
    """
    model_object = dice_ml.Model(model_path=model_path, backend=model_backend) 
    
    explainer = dice_ml.Dice(data_object, model_object)
    
    return explainer
예제 #20
0
    def get_counterfactual(self, data_rows, y, ds):
        # TODO: What about y?
        #        - I think the model is called on X again, so no need to pass prediction in again?
        X, y = ds.pandas()
        df = pd.concat((X, y), axis=1)
        d = dice_ml.Data(dataframe=X,
                         continuous_features=continous_columns,
                         outcome_name='income')
        backend = 'PYT'
        m = dice_ml.Model(model=self, backend=backend)
        exp = dice_ml.Dice(d, m)

        instances = pd.DataFrame.to_dict(X.iloc[data_rows], orient='record')
        res = []
        for i in range(len(instances)):
            dice_exp = exp.generate_counterfactuals(
                instances[i],
                total_CFs=1,
                desired_class="opposite",
                proximity_weight=0.5,
                diversity_weight=1,
                categorical_penalty=0.1,
                algorithm="DiverseCF",
                features_to_vary="all",
                yloss_type="hinge_loss",
                diversity_loss_type="dpp_style:inverse_dist",
                feature_weights="inverse_mad",
                optimizer="pytorch:adam",
                learning_rate=0.05,
                min_iter=500,
                max_iter=5000,
                project_iter=0,
                loss_diff_thres=1e-5,
                loss_converge_maxiter=1,
                verbose=False,
                init_near_query_instance=True,
                tie_random=False,
                stopping_threshold=0.5,
                posthoc_sparsity_param=0.1,
                posthoc_sparsity_algorithm="binary")
            res.append(dice_exp.final_cfs_df)
        return pd.concat(res).reset_index()
예제 #21
0
def generate_CF(instance):
    X, y = Adult('dataset', train=True).pandas()
    ds = pd.concat((X, y), axis=1)
    d = dice_ml.Data(dataframe=ds, continuous_features=continous_columns, outcome_name='income')
    backend = 'PYT'
    model = FcNet()
    m = dice_ml.Model(model=model, backend=backend)
    exp = dice_ml.Dice(d, m)
    instance = pd.DataFrame.to_dict(instance,orient ='record')
    dice_exp = exp.generate_counterfactuals(instance[0], total_CFs=1, desired_class="opposite",
                                            proximity_weight=0.5, diversity_weight=1, categorical_penalty=0.1, 
                                            algorithm="DiverseCF", features_to_vary="all", yloss_type="hinge_loss", 
                                            diversity_loss_type="dpp_style:inverse_dist", 
                                            feature_weights="inverse_mad", optimizer="pytorch:adam", 
                                            learning_rate=0.05, min_iter=500, max_iter=1000, project_iter=0, 
                                            loss_diff_thres=1e-5, loss_converge_maxiter=1, verbose=False, 
                                            init_near_query_instance=True, tie_random=False, 
                                            stopping_threshold=0.5, posthoc_sparsity_param=0.1, 
                                            posthoc_sparsity_algorithm="binary")
    res = dice_exp.final_cfs_df
    return res
예제 #22
0
파일: conftest.py 프로젝트: rmazzine/DiCE
def sklearn_multiclass_classification_model_interface():
    ML_modelpath = helpers.get_custom_dataset_modelpath_pipeline_multiclass()
    m = dice_ml.Model(model_path=ML_modelpath,
                      backend='sklearn',
                      model_type='classifier')
    return m
예제 #23
0
파일: conftest.py 프로젝트: rmazzine/DiCE
def sklearn_regression_model_interface():
    ML_modelpath = helpers.get_custom_dataset_modelpath_pipeline_regression()
    m = dice_ml.Model(model_path=ML_modelpath,
                      backend='sklearn',
                      model_type='regression')
    return m
예제 #24
0
y_pred = rf.predict(X_test)
print(f"F1 Score {f1_score(y_test, y_pred, average='macro')}")
print(f"Accuracy {accuracy_score(y_test, y_pred)}")

# %% Create diverse counterfactual explanations
# pip install dice-ml
import dice_ml
# Dataset
data_dice = dice_ml.Data(
    dataframe=data_loader.data,
    # For perturbation strategy
    continuous_features=['age', 'avg_glucose_level', 'bmi'],
    outcome_name='stroke')
# Model
rf_dice = dice_ml.Model(
    model=rf,
    # There exist backends for tf, torch, ...
    backend="sklearn")
explainer = dice_ml.Dice(
    data_dice,
    rf_dice,
    # Random sampling, genetic algorithm, kd-tree,...
    method="random")

# %% Create explanation
# Generate CF based on the blackbox model
input_datapoint = X_test[0:1]
cf = explainer.generate_counterfactuals(input_datapoint,
                                        total_CFs=3,
                                        desired_class="opposite")
# Visualize it
cf.visualize_as_dataframe(show_only_changes=True)
예제 #25
0
 def _get_model(self, backend):
     ML_modelpath = helpers.get_adult_income_modelpath(backend=backend)
     m = dice_ml.Model(model_path=ML_modelpath, backend=backend)
     return m
예제 #26
0
 def test_model_user_validation_no_valid_model(self):
     with pytest.raises(ValueError):
         dice_ml.Model(backend='sklearn')