def test_redacting_pd_num(self): fairness_info = self.creditg_pd_num["fairness_info"] redacting = Redacting(**fairness_info) logistic_regression = LogisticRegression(max_iter=1000) trainable_remi = redacting >> logistic_regression self._attempt_remi_creditg_pd_num(fairness_info, trainable_remi, 0.80, 0.90)
def fit(self, X, y): from lale.lib.aif360 import ProtectedAttributesEncoder, Redacting fairness_info = { "favorable_labels": self.favorable_labels, "protected_attributes": self.protected_attributes, } redacting = Redacting(**fairness_info) if self.redact else lale.lib.lale.NoOp trainable_redact_and_prep = redacting >> self.preparation assert isinstance(trainable_redact_and_prep, lale.operators.TrainablePipeline) self.redact_and_prep = trainable_redact_and_prep.fit(X, y) self.prot_attr_enc = ProtectedAttributesEncoder( **fairness_info, remainder="drop", return_X_y=True, ) prot_attr_names = [pa["feature"] for pa in self.protected_attributes] self.pandas_to_dataset = _PandasToDatasetConverter( favorable_label=1, unfavorable_label=0, protected_attribute_names=prot_attr_names, ) encoded_data = self._prep_and_encode(X, y) self.mitigator.fit(encoded_data) self.unfavorable_labels = list(set(list(y)) - set(list(self.favorable_labels))) return self
def fit(self, X, y): from lale.lib.aif360 import ProtectedAttributesEncoder, Redacting fairness_info = { "favorable_labels": self.favorable_labels, "protected_attributes": self.protected_attributes, "unfavorable_labels": self.unfavorable_labels, } redacting = Redacting( **fairness_info) if self.redact else lale.lib.lale.NoOp trainable_redact_and_estim = redacting >> self.estimator assert isinstance(trainable_redact_and_estim, TrainablePipeline) self.redact_and_estim = trainable_redact_and_estim.fit(X, y) self.prot_attr_enc = ProtectedAttributesEncoder( **fairness_info, remainder="drop", return_X_y=True, ) prot_attr_names = [pa["feature"] for pa in self.protected_attributes] self.pandas_to_dataset = _PandasToDatasetConverter( favorable_label=1, unfavorable_label=0, protected_attribute_names=prot_attr_names, ) encoded_X, encoded_y = self.prot_attr_enc.transform(X, y) self.y_dtype = encoded_y.dtype self.y_name = encoded_y.name predicted_y = self.redact_and_estim.predict(X) predicted_y = _ndarray_to_series(predicted_y, self.y_name, X.index) _, predicted_y = self.prot_attr_enc.transform(X, predicted_y) predicted_probas = self.redact_and_estim.predict_proba(X) dataset_true = self.pandas_to_dataset.convert(encoded_X, encoded_y) dataset_pred = self.pandas_to_dataset.convert(encoded_X, predicted_y, predicted_probas) self.mitigator = self.mitigator.fit(dataset_true, dataset_pred) self.classes_ = set(list(y)) self.not_favorable_labels = list(self.classes_ - set(list(self.favorable_labels))) self.classes_ = np.array(list(self.classes_)) return self
def test_redacting_pd_cat(self): fairness_info = self.creditg_pd_cat["fairness_info"] estim = self.prep_pd_cat >> LogisticRegression(max_iter=1000) trainable_remi = Redacting(**fairness_info) >> estim self._attempt_remi_creditg_pd_cat(fairness_info, trainable_remi, 0.81, 0.91)