def test_model_user_validation_model_type(self, create_iris_data): x_train, x_test, y_train, y_test, feature_names, classes = \ create_iris_data trained_model = self.create_sklearn_random_forest_classifier(x_train, y_train) assert dice_ml.Model(model=trained_model, backend='sklearn', model_type='classifier') is not None assert dice_ml.Model(model=trained_model, backend='sklearn', model_type='regressor') is not None with pytest.raises(UserConfigValidationException): dice_ml.Model(model=trained_model, backend='sklearn', model_type='random')
def test_base_model_regression(self, create_boston_data): x_train, x_test, y_train, y_test, feature_names = \ create_boston_data trained_model = self.create_sklearn_random_forest_regressor( x_train, y_train) diceml_model = dice_ml.Model(model=trained_model, model_type='regressor', backend='sklearn') diceml_model.transformer.initialize_transform_func() assert diceml_model is not None prediction_probabilities = diceml_model.get_output(x_test).reshape( -1, 1) assert prediction_probabilities.shape[0] == x_test.shape[0] assert prediction_probabilities.shape[1] == 1 predictions = diceml_model.get_output(x_test, model_score=False).reshape( -1, 1) assert predictions.shape[0] == x_test.shape[0] assert predictions.shape[1] == 1 with pytest.raises(NotImplementedError): diceml_model.get_gradient() with pytest.raises(SystemException): diceml_model.get_num_output_nodes2(x_test)
def test_model_user_validation_no_valid_model(self): with pytest.raises( ValueError, match= "should provide either a trained model or the path to a model" ): dice_ml.Model(backend='sklearn')
def tf_model_object(): backend = 'TF' + tf.__version__[0] ML_modelpath = helpers.get_adult_income_modelpath(backend=backend) m = dice_ml.Model(model_path=ML_modelpath, backend=backend, func='ohe-min-max') return m
def on_button_clicked(b): with button_output: print("Generating explanations may take a few minutes...") print() #SETTING UP d = dice_ml.Data(dataframe=dataname, continuous_features=cont_feat, outcome_name=outcome_name) backend = 'TF'+tf.__version__[0] # TF2 m = dice_ml.Model(model=modelname, backend=backend) exp = dice_ml.Dice(d, m) #Generating CFs query_instance = dict(zip(feature_names, explore.queryvaluestouse)) if f.weightdropdown.value=='Use Default Weights': dice_exp = exp.generate_counterfactuals(query_instance,total_CFs=num_exp.value, desired_class="opposite", features_to_vary=f.useusing, proximity_weight=prox.value, diversity_weight=div.value) elif f.weightdropdown.value=='Choose Your Own Weights': #putting weights into dict weightstouse=dict(zip(f.useusing, f.weightvaluestouse)) dice_exp = exp.generate_counterfactuals(query_instance, total_CFs=num_exp.value, desired_class="opposite", features_to_vary=f.useusing, feature_weights=weightstouse, proximity_weight=prox.value, diversity_weight=div.value) explore.dice_exp=dice_exp
def _get_exp(self, backend, method="random"): dataset = helpers.load_adult_income_dataset() d = dice_ml.Data(dataframe=dataset, continuous_features=['age', 'hours_per_week'], outcome_name='income') ML_modelpath = helpers.get_adult_income_modelpath(backend=backend) m = dice_ml.Model(model_path=ML_modelpath, backend=backend) exp = dice_ml.Dice(d, m, method=method) return exp
def pyt_model_object(): backend = 'PYT' ML_modelpath = helpers.get_adult_income_modelpath(backend=backend) m = dice_ml.Model(model_path=ML_modelpath, backend=backend, func='ohe-min-max') return m
def __init__(self, mlmodel: MLModel, hyperparams: Optional[Dict] = None) -> None: supported_backends = ["tensorflow", "pytorch"] if mlmodel.backend not in supported_backends: raise ValueError( f"{mlmodel.backend} is not in supported backends {supported_backends}" ) super().__init__(mlmodel) self._continuous = mlmodel.data.continuous self._categorical = mlmodel.data.categorical self._target = mlmodel.data.target self._model = mlmodel checked_hyperparams = merge_default_parameters( hyperparams, self._DEFAULT_HYPERPARAMS ) # Prepare data for dice data structure self._dice_data = dice_ml.Data( dataframe=mlmodel.data.df, continuous_features=self._continuous, outcome_name=self._target, ) self._dice_model = dice_ml.Model(model=mlmodel, backend="sklearn") self._dice = dice_ml.Dice(self._dice_data, self._dice_model, method="random") self._num = checked_hyperparams["num"] self._desired_class = checked_hyperparams["desired_class"] self._post_hoc_sparsity_param = checked_hyperparams["posthoc_sparsity_param"]
def test_numeric_categories(self, desired_range, method, create_boston_data): x_train, x_test, y_train, y_test, feature_names = \ create_boston_data rfc = RandomForestRegressor(n_estimators=10, max_depth=4, random_state=777) model = rfc.fit(x_train, y_train) dataset_train = x_train.copy() dataset_train['Outcome'] = y_train feature_names.remove('CHAS') d = dice_ml.Data(dataframe=dataset_train, continuous_features=feature_names, outcome_name='Outcome') m = dice_ml.Model(model=model, backend='sklearn', model_type='regressor') exp = dice_ml.Dice(d, m, method=method) cf_explanation = exp.generate_counterfactuals( query_instances=x_test.iloc[0:1], total_CFs=10, desired_range=desired_range) assert cf_explanation is not None
def test_base_model_classification(self, create_iris_data): x_train, x_test, y_train, y_test, feature_names, classes = \ create_iris_data trained_model = self.create_sklearn_random_forest_classifier( x_train, y_train) diceml_model = dice_ml.Model(model=trained_model, backend='sklearn') diceml_model.transformer.initialize_transform_func() assert diceml_model is not None prediction_probabilities = diceml_model.get_output(x_test) assert prediction_probabilities.shape[0] == x_test.shape[0] assert prediction_probabilities.shape[1] == len(classes) predictions = diceml_model.get_output(x_test, model_score=False).reshape( -1, 1) assert predictions.shape[0] == x_test.shape[0] assert predictions.shape[1] == 1 assert np.all(np.unique(predictions) == np.unique(y_test)) with pytest.raises(NotImplementedError): diceml_model.get_gradient() assert diceml_model.get_num_output_nodes2(x_test) == len(classes)
def _get_exp(self, backend, method="random", is_public_data_interface=True): if is_public_data_interface: dataset = helpers.load_adult_income_dataset() d = dice_ml.Data(dataframe=dataset, continuous_features=['age', 'hours_per_week'], outcome_name='income') else: d = dice_ml.Data(features={ 'age': [17, 90], 'workclass': ['Government', 'Other/Unknown', 'Private', 'Self-Employed'], 'education': [ 'Assoc', 'Bachelors', 'Doctorate', 'HS-grad', 'Masters', 'Prof-school', 'School', 'Some-college' ], 'marital_status': ['Divorced', 'Married', 'Separated', 'Single', 'Widowed'], 'occupation': [ 'Blue-Collar', 'Other/Unknown', 'Professional', 'Sales', 'Service', 'White-Collar' ], 'race': ['Other', 'White'], 'gender': ['Female', 'Male'], 'hours_per_week': [1, 99] }, outcome_name='income') ML_modelpath = helpers.get_adult_income_modelpath(backend=backend) m = dice_ml.Model(model_path=ML_modelpath, backend=backend) exp = dice_ml.Dice(d, m, method=method) return exp
def regression_exp_object(method="random"): backend = 'sklearn' dataset = helpers.load_custom_testing_dataset_regression() d = dice_ml.Data(dataframe=dataset, continuous_features=['Numerical'], outcome_name='Outcome') ML_modelpath = helpers.get_custom_dataset_modelpath_pipeline_regression() m = dice_ml.Model(model_path=ML_modelpath, backend=backend, model_type='regressor') exp = dice_ml.Dice(d, m, method=method) return exp
def binary_classification_exp_object_out_of_order(method="random"): backend = 'sklearn' dataset = helpers.load_outcome_not_last_column_dataset() d = dice_ml.Data(dataframe=dataset, continuous_features=['Numerical'], outcome_name='Outcome') ML_modelpath = helpers.get_custom_dataset_modelpath_pipeline_binary() m = dice_ml.Model(model_path=ML_modelpath, backend=backend) exp = dice_ml.Dice(d, m, method=method) return exp
def pyt_exp_object(): backend = 'PYT' dataset = helpers.load_adult_income_dataset() d = dice_ml.Data(dataframe=dataset, continuous_features=['age', 'hours_per_week'], outcome_name='income') ML_modelpath = helpers.get_adult_income_modelpath(backend=backend) m = dice_ml.Model(model_path= ML_modelpath, backend=backend) exp = dice_ml.Dice(d, m) return exp
def random_binary_classification_exp_object(): backend = 'sklearn' dataset = helpers.load_custom_testing_dataset() d = dice_ml.Data(dataframe=dataset, continuous_features=['Numerical'], outcome_name='Outcome') ML_modelpath = helpers.get_custom_dataset_modelpath_pipeline() m = dice_ml.Model(model_path=ML_modelpath, backend=backend) exp = dice_ml.Dice(d, m, method='random') return exp
def test_model_initiation_fullpath(): """ Tests if model is initiated when full path to a model and explainer class is given to backend parameter. """ pyt = pytest.importorskip("torch") backend = {'model': 'pytorch_model.PyTorchModel', 'explainer': 'dice_pytorch.DicePyTorch'} ML_modelpath = helpers.get_adult_income_modelpath(backend=backend) m = dice_ml.Model(model_path= ML_modelpath, backend=backend) assert isinstance(m, dice_ml.model_interfaces.pytorch_model.PyTorchModel)
def _create_diceml_explainer(self, method, continuous_features): dice_data = dice_ml.Data(dataframe=self._train, continuous_features=continuous_features, outcome_name=self._target_column) model_type = CounterfactualConstants.CLASSIFIER \ if self._task_type == ModelTask.CLASSIFICATION else \ CounterfactualConstants.REGRESSOR dice_model = dice_ml.Model(model=self._model, backend=CounterfactualConstants.SKLEARN, model_type=model_type) dice_explainer = Dice(dice_data, dice_model, method=method) return dice_explainer
def test_model_initiation_fullpath(): """ Tests if model is initiated when full path to a model and explainer class is given to backend parameter. """ tf_version = tf.__version__[0] backend = { 'model': 'keras_tensorflow_model.KerasTensorFlowModel', 'explainer': 'dice_tensorflow' + tf_version + '.DiceTensorFlow' + tf_version } ML_modelpath = helpers.get_adult_income_modelpath(backend=backend) m = dice_ml.Model(model_path=ML_modelpath, backend=backend) assert isinstance( m, dice_ml.model_interfaces.keras_tensorflow_model.KerasTensorFlowModel)
def get_explainer_object(model_path, model_backend, data_object): """ Provides feature importances to explain the model. Parameters: model: trained model model_backend: indicates the implementation type of DiCE we want to use. data_object: DiCE data object Returns: explainer (object): provides the feature importances that determines the prediction of the model """ model_object = dice_ml.Model(model_path=model_path, backend=model_backend) explainer = dice_ml.Dice(data_object, model_object) return explainer
def get_counterfactual(self, data_rows, y, ds): # TODO: What about y? # - I think the model is called on X again, so no need to pass prediction in again? X, y = ds.pandas() df = pd.concat((X, y), axis=1) d = dice_ml.Data(dataframe=X, continuous_features=continous_columns, outcome_name='income') backend = 'PYT' m = dice_ml.Model(model=self, backend=backend) exp = dice_ml.Dice(d, m) instances = pd.DataFrame.to_dict(X.iloc[data_rows], orient='record') res = [] for i in range(len(instances)): dice_exp = exp.generate_counterfactuals( instances[i], total_CFs=1, desired_class="opposite", proximity_weight=0.5, diversity_weight=1, categorical_penalty=0.1, algorithm="DiverseCF", features_to_vary="all", yloss_type="hinge_loss", diversity_loss_type="dpp_style:inverse_dist", feature_weights="inverse_mad", optimizer="pytorch:adam", learning_rate=0.05, min_iter=500, max_iter=5000, project_iter=0, loss_diff_thres=1e-5, loss_converge_maxiter=1, verbose=False, init_near_query_instance=True, tie_random=False, stopping_threshold=0.5, posthoc_sparsity_param=0.1, posthoc_sparsity_algorithm="binary") res.append(dice_exp.final_cfs_df) return pd.concat(res).reset_index()
def generate_CF(instance): X, y = Adult('dataset', train=True).pandas() ds = pd.concat((X, y), axis=1) d = dice_ml.Data(dataframe=ds, continuous_features=continous_columns, outcome_name='income') backend = 'PYT' model = FcNet() m = dice_ml.Model(model=model, backend=backend) exp = dice_ml.Dice(d, m) instance = pd.DataFrame.to_dict(instance,orient ='record') dice_exp = exp.generate_counterfactuals(instance[0], total_CFs=1, desired_class="opposite", proximity_weight=0.5, diversity_weight=1, categorical_penalty=0.1, algorithm="DiverseCF", features_to_vary="all", yloss_type="hinge_loss", diversity_loss_type="dpp_style:inverse_dist", feature_weights="inverse_mad", optimizer="pytorch:adam", learning_rate=0.05, min_iter=500, max_iter=1000, project_iter=0, loss_diff_thres=1e-5, loss_converge_maxiter=1, verbose=False, init_near_query_instance=True, tie_random=False, stopping_threshold=0.5, posthoc_sparsity_param=0.1, posthoc_sparsity_algorithm="binary") res = dice_exp.final_cfs_df return res
def sklearn_multiclass_classification_model_interface(): ML_modelpath = helpers.get_custom_dataset_modelpath_pipeline_multiclass() m = dice_ml.Model(model_path=ML_modelpath, backend='sklearn', model_type='classifier') return m
def sklearn_regression_model_interface(): ML_modelpath = helpers.get_custom_dataset_modelpath_pipeline_regression() m = dice_ml.Model(model_path=ML_modelpath, backend='sklearn', model_type='regression') return m
y_pred = rf.predict(X_test) print(f"F1 Score {f1_score(y_test, y_pred, average='macro')}") print(f"Accuracy {accuracy_score(y_test, y_pred)}") # %% Create diverse counterfactual explanations # pip install dice-ml import dice_ml # Dataset data_dice = dice_ml.Data( dataframe=data_loader.data, # For perturbation strategy continuous_features=['age', 'avg_glucose_level', 'bmi'], outcome_name='stroke') # Model rf_dice = dice_ml.Model( model=rf, # There exist backends for tf, torch, ... backend="sklearn") explainer = dice_ml.Dice( data_dice, rf_dice, # Random sampling, genetic algorithm, kd-tree,... method="random") # %% Create explanation # Generate CF based on the blackbox model input_datapoint = X_test[0:1] cf = explainer.generate_counterfactuals(input_datapoint, total_CFs=3, desired_class="opposite") # Visualize it cf.visualize_as_dataframe(show_only_changes=True)
def _get_model(self, backend): ML_modelpath = helpers.get_adult_income_modelpath(backend=backend) m = dice_ml.Model(model_path=ML_modelpath, backend=backend) return m
def test_model_user_validation_no_valid_model(self): with pytest.raises(ValueError): dice_ml.Model(backend='sklearn')