def fit(self, dataset_true, dataset_pred): """Compute parameters for equalizing generalized odds using true and predicted scores, while preserving calibration. Args: dataset_true (BinaryLabelDataset): Dataset containing true `labels`. dataset_pred (BinaryLabelDataset): Dataset containing predicted `scores`. Returns: CalibratedEqOddsPostprocessing: Returns self. """ # Create boolean conditioning vectors for protected groups cond_vec_priv = utils.compute_boolean_conditioning_vector( dataset_pred.protected_attributes, dataset_pred.protected_attribute_names, self.privileged_groups) cond_vec_unpriv = utils.compute_boolean_conditioning_vector( dataset_pred.protected_attributes, dataset_pred.protected_attribute_names, self.unprivileged_groups) cm = ClassificationMetric(dataset_true, dataset_pred, unprivileged_groups=self.unprivileged_groups, privileged_groups=self.privileged_groups) self.base_rate_priv = cm.base_rate(privileged=True) self.base_rate_unpriv = cm.base_rate(privileged=False) # Create a dataset with "trivial" predictions dataset_trivial = dataset_pred.copy(deepcopy=True) dataset_trivial.scores[cond_vec_priv] = cm.base_rate(privileged=True) dataset_trivial.scores[cond_vec_unpriv] = cm.base_rate(privileged=False) cm_triv = ClassificationMetric(dataset_true, dataset_trivial, unprivileged_groups=self.unprivileged_groups, privileged_groups=self.privileged_groups) if self.fn_rate == 0: priv_cost = cm.generalized_false_positive_rate(privileged=True) unpriv_cost = cm.generalized_false_positive_rate(privileged=False) priv_trivial_cost = cm_triv.generalized_false_positive_rate(privileged=True) unpriv_trivial_cost = cm_triv.generalized_false_positive_rate(privileged=False) elif self.fp_rate == 0: priv_cost = cm.generalized_false_negative_rate(privileged=True) unpriv_cost = cm.generalized_false_negative_rate(privileged=False) priv_trivial_cost = cm_triv.generalized_false_negative_rate(privileged=True) unpriv_trivial_cost = cm_triv.generalized_false_negative_rate(privileged=False) else: priv_cost = weighted_cost(self.fp_rate, self.fn_rate, cm, privileged=True) unpriv_cost = weighted_cost(self.fp_rate, self.fn_rate, cm, privileged=False) priv_trivial_cost = weighted_cost(self.fp_rate, self.fn_rate, cm_triv, privileged=True) unpriv_trivial_cost = weighted_cost(self.fp_rate, self.fn_rate, cm_triv, privileged=False) unpriv_costs_more = unpriv_cost > priv_cost self.priv_mix_rate = (unpriv_cost - priv_cost) / (priv_trivial_cost - priv_cost) if unpriv_costs_more else 0 self.unpriv_mix_rate = 0 if unpriv_costs_more else (priv_cost - unpriv_cost) / (unpriv_trivial_cost - unpriv_cost) return self
def fit(self, dataset_true, dataset_pred): """Compute parameters for equalizing odds using true and predicted labels. Args: true_dataset (BinaryLabelDataset): Dataset containing true labels. pred_dataset (BinaryLabelDataset): Dataset containing predicted labels. Returns: EqOddsPostprocessing: Returns self. """ metric = ClassificationMetric( dataset_true, dataset_pred, unprivileged_groups=self.unprivileged_groups, privileged_groups=self.privileged_groups) # compute basic statistics sbr = metric.base_rate(privileged=True) obr = metric.base_rate(privileged=False) fpr0 = metric.false_positive_rate(privileged=True) fpr1 = metric.false_positive_rate(privileged=False) fnr0 = metric.false_negative_rate(privileged=True) fnr1 = metric.false_negative_rate(privileged=False) tpr0 = metric.true_positive_rate(privileged=True) tpr1 = metric.true_positive_rate(privileged=False) tnr0 = metric.true_negative_rate(privileged=True) tnr1 = metric.true_negative_rate(privileged=False) # linear program has 4 decision variables: # [Pr[label_tilde = 1 | label_hat = 1, protected_attributes = 0]; # Pr[label_tilde = 1 | label_hat = 0, protected_attributes = 0]; # Pr[label_tilde = 1 | label_hat = 1, protected_attributes = 1]; # Pr[label_tilde = 1 | label_hat = 0, protected_attributes = 1]] # Coefficients of the linear objective function to be minimized. c = np.array([fpr0 - tpr0, tnr0 - fnr0, fpr1 - tpr1, tnr1 - fnr1]) # A_ub - 2-D array which, when matrix-multiplied by x, gives the values # of the upper-bound inequality constraints at x # b_ub - 1-D array of values representing the upper-bound of each # inequality constraint (row) in A_ub. # Just to keep these between zero and one A_ub = np.array( [[1, 0, 0, 0], [-1, 0, 0, 0], [0, 1, 0, 0], [0, -1, 0, 0], [0, 0, 1, 0], [0, 0, -1, 0], [0, 0, 0, 1], [0, 0, 0, -1]], dtype=np.float64) b_ub = np.array([1, 0, 1, 0, 1, 0, 1, 0], dtype=np.float64) # Create boolean conditioning vectors for protected groups cond_vec_priv = utils.compute_boolean_conditioning_vector( dataset_pred.protected_attributes, dataset_pred.protected_attribute_names, self.privileged_groups) cond_vec_unpriv = utils.compute_boolean_conditioning_vector( dataset_pred.protected_attributes, dataset_pred.protected_attribute_names, self.unprivileged_groups) sconst = np.ravel( dataset_pred.labels[cond_vec_priv] == dataset_pred.favorable_label) sflip = np.ravel(dataset_pred.labels[cond_vec_priv] == dataset_pred.unfavorable_label) oconst = np.ravel(dataset_pred.labels[cond_vec_unpriv] == dataset_pred.favorable_label) oflip = np.ravel(dataset_pred.labels[cond_vec_unpriv] == dataset_pred.unfavorable_label) y_true = dataset_true.labels.ravel() sm_tn = np.logical_and( sflip, y_true[cond_vec_priv] == dataset_true.unfavorable_label, dtype=np.float64) sm_fn = np.logical_and( sflip, y_true[cond_vec_priv] == dataset_true.favorable_label, dtype=np.float64) sm_fp = np.logical_and( sconst, y_true[cond_vec_priv] == dataset_true.unfavorable_label, dtype=np.float64) sm_tp = np.logical_and( sconst, y_true[cond_vec_priv] == dataset_true.favorable_label, dtype=np.float64) om_tn = np.logical_and( oflip, y_true[cond_vec_unpriv] == dataset_true.unfavorable_label, dtype=np.float64) om_fn = np.logical_and( oflip, y_true[cond_vec_unpriv] == dataset_true.favorable_label, dtype=np.float64) om_fp = np.logical_and( oconst, y_true[cond_vec_unpriv] == dataset_true.unfavorable_label, dtype=np.float64) om_tp = np.logical_and( oconst, y_true[cond_vec_unpriv] == dataset_true.favorable_label, dtype=np.float64) # A_eq - 2-D array which, when matrix-multiplied by x, # gives the values of the equality constraints at x # b_eq - 1-D array of values representing the RHS of each equality # constraint (row) in A_eq. # Used to impose equality of odds constraint A_eq = [ [(np.mean(sconst * sm_tp) - np.mean(sflip * sm_tp)) / sbr, (np.mean(sflip * sm_fn) - np.mean(sconst * sm_fn)) / sbr, (np.mean(oflip * om_tp) - np.mean(oconst * om_tp)) / obr, (np.mean(oconst * om_fn) - np.mean(oflip * om_fn)) / obr], [(np.mean(sconst * sm_fp) - np.mean(sflip * sm_fp)) / (1 - sbr), (np.mean(sflip * sm_tn) - np.mean(sconst * sm_tn)) / (1 - sbr), (np.mean(oflip * om_fp) - np.mean(oconst * om_fp)) / (1 - obr), (np.mean(oconst * om_tn) - np.mean(oflip * om_tn)) / (1 - obr)] ] b_eq = [ (np.mean(oflip * om_tp) + np.mean(oconst * om_fn)) / obr - (np.mean(sflip * sm_tp) + np.mean(sconst * sm_fn)) / sbr, (np.mean(oflip * om_fp) + np.mean(oconst * om_tn)) / (1 - obr) - (np.mean(sflip * sm_fp) + np.mean(sconst * sm_tn)) / (1 - sbr) ] # Linear program self.model_params = linprog(c, A_ub=A_ub, b_ub=b_ub, A_eq=A_eq, b_eq=b_eq) return self