def test_argument_types(self, transformX, transformY, transformA): # This is an expanded-out version of one of the smoke tests expgrad = ExponentiatedGradient(self.learner, constraints=DemographicParity(), eps=0.1) expgrad.fit(transformX(self.X), transformY(self.y), sensitive_features=transformA(self.A)) res = expgrad._expgrad_result._as_dict() Q = res["best_classifier"] res["n_classifiers"] = len(res["classifiers"]) disp = DemographicParity() disp.load_data(self.X, self.y, sensitive_features=self.A) error = ErrorRate() error.load_data(self.X, self.y, sensitive_features=self.A) res["disp"] = disp.gamma(Q).max() res["error"] = error.gamma(Q)[0] assert res["best_gap"] == pytest.approx(0.0000, abs=self._PRECISION) assert res["last_t"] == 5 assert res["best_t"] == 5 assert res["disp"] == pytest.approx(0.1, abs=self._PRECISION) assert res["error"] == pytest.approx(0.25, abs=self._PRECISION) assert res["n_oracle_calls"] == 32 assert res["n_classifiers"] == 3
def test_argument_types(self, transformX, transformY, transformA, A_two_dim): # This is an expanded-out version of one of the smoke tests X, y, A = _get_data(A_two_dim) merged_A = _map_into_single_column(A) expgrad = ExponentiatedGradient(LeastSquaresBinaryClassifierLearner(), constraints=DemographicParity(), eps=0.1) expgrad.fit(transformX(X), transformY(y), sensitive_features=transformA(A)) Q = expgrad._best_classifier n_classifiers = len(expgrad._classifiers) disparity_moment = DemographicParity() disparity_moment.load_data(X, y, sensitive_features=merged_A) error = ErrorRate() error.load_data(X, y, sensitive_features=merged_A) disparity = disparity_moment.gamma(Q).max() error = error.gamma(Q)[0] assert expgrad._best_gap == pytest.approx(0.0000, abs=_PRECISION) assert expgrad._last_t == 5 assert expgrad._best_t == 5 assert disparity == pytest.approx(0.1, abs=_PRECISION) assert error == pytest.approx(0.25, abs=_PRECISION) assert expgrad._n_oracle_calls == 32 assert n_classifiers == 3
def test_argument_types(self, transformX, transformY, transformA, A_two_dim): # This is an expanded-out version of one of the smoke tests X, y, A = _get_data(A_two_dim) merged_A = _map_into_single_column(A) expgrad = ExponentiatedGradient(LeastSquaresBinaryClassifierLearner(), constraints=DemographicParity(), eps=0.1) expgrad.fit(transformX(X), transformY(y), sensitive_features=transformA(A)) res = expgrad._expgrad_result._as_dict() Q = res["best_classifier"] res["n_classifiers"] = len(res["classifiers"]) disp = DemographicParity() disp.load_data(X, y, sensitive_features=merged_A) error = ErrorRate() error.load_data(X, y, sensitive_features=merged_A) res["disp"] = disp.gamma(Q).max() res["error"] = error.gamma(Q)[0] assert res["best_gap"] == pytest.approx(0.0000, abs=_PRECISION) assert res["last_t"] == 5 assert res["best_t"] == 5 assert res["disp"] == pytest.approx(0.1, abs=_PRECISION) assert res["error"] == pytest.approx(0.25, abs=_PRECISION) assert res["n_oracle_calls"] == 32 assert res["n_classifiers"] == 3
def gridSearch(model, X_train, Y_train, A_train, grid_size): """ Generates a sequence of relabellings and reweightings, and trains a predictor for each. Only applicable for binary feature. Parameters: x_train: input data for training model y_train: list of ground truths model: the unmitigated algorthmic model Returns a dataframe of the different predictors and its accuracy scores and disparity scores. """ sweep = GridSearch(model, constraints=DemographicParity(), grid_size=grid_size) # we extract the full set of predictors from the `GridSearch` object sweep.fit(X_train, Y_train, sensitive_features=A_train) predictors = sweep._predictors """ Remove the predictors which are dominated in the error-disparity space by others from the sweep (note that the disparity will only be calculated for the protected attribute; other potentially protected attributes will not be mitigated) In general, one might not want to do this, since there may be other considerations beyond the strict optimisation of error and disparity (of the given protected attribute). """ errors, disparities = [], [] for m in predictors: classifier = lambda X: m.predict(X) error = ErrorRate() error.load_data(X_train, pd.Series(Y_train), sensitive_features=A_train) disparity = DemographicParity() disparity.load_data(X_train, pd.Series(Y_train), sensitive_features=A_train) errors.append(error.gamma(classifier)[0]) disparities.append(disparity.gamma(classifier).max()) all_results = pd.DataFrame({ "predictor": predictors, "error": errors, "disparity": disparities }) non_dominated = [] for row in all_results.itertuples(): errors_for_lower_or_eq_disparity = all_results["error"][ all_results["disparity"] <= row.disparity] if row.error <= errors_for_lower_or_eq_disparity.min(): non_dominated.append(row.predictor) return non_dominated
def __remove_predictors_dominated_error_disparity_by_sweep(self, predictors, X_train, Y_train, A_train): errors, disparities = [], [] for m in predictors: def classifier(X): return m.predict(X) error = ErrorRate() error.load_data(X_train, pd.Series(Y_train), sensitive_features=A_train.diabetic) disparity = DemographicParity() disparity.load_data(X_train, pd.Series( Y_train), sensitive_features=A_train.diabetic) errors.append(error.gamma(classifier)[0]) disparities.append(disparity.gamma(classifier).max()) return pd.DataFrame({"predictor": predictors, "error": errors, "disparity": disparities})
def test_argument_types_ratio_bound(self, transformX, transformY, transformA, A_two_dim): # This is an expanded-out version of one of the smoke tests X, y, A = _get_data(A_two_dim) merged_A = _map_into_single_column(A) transformed_X = transformX(X) transformed_y = transformY(y) transformed_A = transformA(A) eps = 0.1 ratio = 1.0 expgrad = ExponentiatedGradient( LeastSquaresBinaryClassifierLearner(), constraints=DemographicParity(ratio_bound_slack=eps, ratio_bound=ratio), eps=eps, ) expgrad.fit(transformed_X, transformed_y, sensitive_features=transformed_A) def Q(X): return expgrad._pmf_predict(X)[:, 1] n_predictors = len(expgrad.predictors_) disparity_moment = DemographicParity(ratio_bound_slack=eps, ratio_bound=ratio) disparity_moment.load_data(X, y, sensitive_features=merged_A) error = ErrorRate() error.load_data(X, y, sensitive_features=merged_A) disparity = disparity_moment.gamma(Q).max() disp = disparity_moment.gamma(Q) disp_eps = disparity_moment.gamma(Q) - disparity_moment.bound() error = error.gamma(Q)[0] assert expgrad.best_gap_ == pytest.approx(0.0000, abs=_PRECISION) assert expgrad.last_iter_ == 5 assert expgrad.best_iter_ == 5 assert disparity == pytest.approx(0.1, abs=_PRECISION) assert np.all(np.isclose(disp - eps, disp_eps)) assert error == pytest.approx(0.25, abs=_PRECISION) assert expgrad.n_oracle_calls_ == 32 assert n_predictors == 3
def test_signed_weights(): dp = DemographicParity() assert dp.short_name == "DemographicParity" num_samples_a0 = 10 num_samples_a1 = 30 num_samples = num_samples_a0 + num_samples_a1 a0_threshold = 0.2 a1_threshold = 0.7 a0_label = 0xDEAD a1_label = 0xBEEF X, Y, A = simple_binary_threshold_data(num_samples_a0, num_samples_a1, a0_threshold, a1_threshold, a0_label, a1_label) # Load up the (rigged) data dp.load_data(X, Y, sensitive_features=A) events = ["all"] signs = ["+", "-"] labels = [a0_label, a1_label] midx = pd.MultiIndex.from_product([signs, events, labels], names=[_SIGN, _EVENT, _GROUP_ID]) lambda_vec = pd.Series([2000, 1000, 500, 100], index=midx, name=0) lambda_a0 = 2000 - 500 lambda_a1 = 1000 - 100 sw_a0 = (lambda_a0 + lambda_a1) - lambda_a0 * (num_samples / num_samples_a0) sw_a1 = (lambda_a0 + lambda_a1) - lambda_a1 * (num_samples / num_samples_a1) w_a0 = np.full(num_samples_a0, sw_a0) w_a1 = np.full(num_samples_a1, sw_a1) expected = np.concatenate((w_a0, w_a1), axis=None) signed_weights = dp.signed_weights(lambda_vec) assert np.array_equal(expected, signed_weights)
def test_construct_and_load(): dp = DemographicParity() assert dp.short_name == "DemographicParity" # Generate some (rigged) data num_samples_a0 = 10 num_samples_a1 = 30 num_samples = num_samples_a0 + num_samples_a1 a0_threshold = 0.2 a1_threshold = 0.7 a0_label = 2 a1_label = 3 X, Y, A = simple_binary_threshold_data(num_samples_a0, num_samples_a1, a0_threshold, a1_threshold, a0_label, a1_label) # Load up the (rigged) data dp.load_data(X, Y, sensitive_features=A) assert dp.data_loaded assert dp.total_samples == num_samples_a0 + num_samples_a1 # Examine the tags DF assert dp.tags['label'].equals(pd.Series(Y)) assert dp.tags['group_id'].equals(pd.Series(A)) assert dp.tags['event'].map(lambda x: x == 'all').all() # Examine the index MultiIndex events = ['all'] signs = ['+', '-'] labels = [2, 3] expected_index = pd.MultiIndex.from_product( [signs, events, labels], names=[_SIGN, _EVENT, _GROUP_ID]) assert dp.index.equals(expected_index) # Examine the prob_event DF # There's only the 'all' event and everything belongs to it assert len(dp.prob_event.index) == 1 assert dp.prob_event.loc['all'] == 1 # Examine the prob_group_event DF # There's only an 'all' event but this records the fractions # of each label in the population assert len(dp.prob_group_event.index) == 2 assert dp.prob_group_event.loc[('all', a0_label)] == num_samples_a0 / num_samples assert dp.prob_group_event.loc[('all', a1_label)] == num_samples_a1 / num_samples # Examine the neg_basis DF # This is obviously looking at the \lambda_{-} values and picking # out the one associated with the first label assert len(dp.neg_basis.index) == 4 assert dp.neg_basis[0]['+', 'all', a0_label] == 0 assert dp.neg_basis[0]['+', 'all', a1_label] == 0 assert dp.neg_basis[0]['-', 'all', a0_label] == 1 assert dp.neg_basis[0]['-', 'all', a1_label] == 0 # Examine the pos_basis DF # This is looking at the \lambda_{+} values and picking out the # one associated with the first label assert len(dp.pos_basis.index) == 4 assert dp.pos_basis[0]['+', 'all', a0_label] == 1 assert dp.pos_basis[0]['+', 'all', a1_label] == 0 assert dp.pos_basis[0]['-', 'all', a0_label] == 0 assert dp.pos_basis[0]['-', 'all', a1_label] == 0 # Examine the neg_basis_present DF assert len(dp.neg_basis_present) == 1 assert dp.neg_basis_present[0]
# calculated for the sensitive feature; other potentially sensitive features will # not be mitigated). # In general, one might not want to do this, since there may be other considerations # beyond the strict optimization of error and disparity (of the given sensitive feature). errors, disparities = [], [] for m in predictors: def classifier(X): return m.predict(X) error = ErrorRate() error.load_data(X_train, pd.Series(Y_train), sensitive_features=A_train) disparity = DemographicParity() disparity.load_data(X_train, pd.Series(Y_train), sensitive_features=A_train) errors.append(error.gamma(classifier)[0]) disparities.append(disparity.gamma(classifier).max()) all_results = pd.DataFrame({ "predictor": predictors, "error": errors, "disparity": disparities }) non_dominated = [] for row in all_results.itertuples(): errors_for_lower_or_eq_disparity = all_results["error"][ all_results["disparity"] <= row.disparity]