Ejemplo n.º 1
0
    def fit(self, dataset_true, dataset_pred):
        """Compute parameters for equalizing generalized odds using true and
        predicted scores, while preserving calibration.

        Args:
            dataset_true (BinaryLabelDataset): Dataset containing true `labels`.
            dataset_pred (BinaryLabelDataset): Dataset containing predicted
                `scores`.

        Returns:
            CalibratedEqOddsPostprocessing: Returns self.
        """

        # Create boolean conditioning vectors for protected groups
        cond_vec_priv = utils.compute_boolean_conditioning_vector(
            dataset_pred.protected_attributes,
            dataset_pred.protected_attribute_names,
            self.privileged_groups)
        cond_vec_unpriv = utils.compute_boolean_conditioning_vector(
            dataset_pred.protected_attributes,
            dataset_pred.protected_attribute_names,
            self.unprivileged_groups)

        cm = ClassificationMetric(dataset_true, dataset_pred,
                                  unprivileged_groups=self.unprivileged_groups,
                                  privileged_groups=self.privileged_groups)
        self.base_rate_priv = cm.base_rate(privileged=True)
        self.base_rate_unpriv = cm.base_rate(privileged=False)

        # Create a dataset with "trivial" predictions
        dataset_trivial = dataset_pred.copy(deepcopy=True)
        dataset_trivial.scores[cond_vec_priv] = cm.base_rate(privileged=True)
        dataset_trivial.scores[cond_vec_unpriv] = cm.base_rate(privileged=False)
        cm_triv = ClassificationMetric(dataset_true, dataset_trivial,
            unprivileged_groups=self.unprivileged_groups,
            privileged_groups=self.privileged_groups)

        if self.fn_rate == 0:
            priv_cost = cm.generalized_false_positive_rate(privileged=True)
            unpriv_cost = cm.generalized_false_positive_rate(privileged=False)
            priv_trivial_cost = cm_triv.generalized_false_positive_rate(privileged=True)
            unpriv_trivial_cost = cm_triv.generalized_false_positive_rate(privileged=False)

        elif self.fp_rate == 0:
            priv_cost = cm.generalized_false_negative_rate(privileged=True)
            unpriv_cost = cm.generalized_false_negative_rate(privileged=False)
            priv_trivial_cost = cm_triv.generalized_false_negative_rate(privileged=True)
            unpriv_trivial_cost = cm_triv.generalized_false_negative_rate(privileged=False)

        else:
            priv_cost = weighted_cost(self.fp_rate, self.fn_rate, cm, privileged=True)
            unpriv_cost = weighted_cost(self.fp_rate, self.fn_rate, cm, privileged=False)
            priv_trivial_cost = weighted_cost(self.fp_rate, self.fn_rate, cm_triv, privileged=True)
            unpriv_trivial_cost = weighted_cost(self.fp_rate, self.fn_rate, cm_triv, privileged=False)

        unpriv_costs_more = unpriv_cost > priv_cost
        self.priv_mix_rate = (unpriv_cost - priv_cost) / (priv_trivial_cost - priv_cost) if unpriv_costs_more else 0
        self.unpriv_mix_rate = 0 if unpriv_costs_more else (priv_cost - unpriv_cost) / (unpriv_trivial_cost - unpriv_cost)

        return self
    def fit(self, dataset_true, dataset_pred):
        """Compute parameters for equalizing odds using true and predicted
        labels.

        Args:
            true_dataset (BinaryLabelDataset): Dataset containing true labels.
            pred_dataset (BinaryLabelDataset): Dataset containing predicted
                labels.

        Returns:
            EqOddsPostprocessing: Returns self.
        """
        metric = ClassificationMetric(
            dataset_true,
            dataset_pred,
            unprivileged_groups=self.unprivileged_groups,
            privileged_groups=self.privileged_groups)

        # compute basic statistics
        sbr = metric.base_rate(privileged=True)
        obr = metric.base_rate(privileged=False)

        fpr0 = metric.false_positive_rate(privileged=True)
        fpr1 = metric.false_positive_rate(privileged=False)
        fnr0 = metric.false_negative_rate(privileged=True)
        fnr1 = metric.false_negative_rate(privileged=False)
        tpr0 = metric.true_positive_rate(privileged=True)
        tpr1 = metric.true_positive_rate(privileged=False)
        tnr0 = metric.true_negative_rate(privileged=True)
        tnr1 = metric.true_negative_rate(privileged=False)

        # linear program has 4 decision variables:
        # [Pr[label_tilde = 1 | label_hat = 1, protected_attributes = 0];
        #  Pr[label_tilde = 1 | label_hat = 0, protected_attributes = 0];
        #  Pr[label_tilde = 1 | label_hat = 1, protected_attributes = 1];
        #  Pr[label_tilde = 1 | label_hat = 0, protected_attributes = 1]]
        # Coefficients of the linear objective function to be minimized.
        c = np.array([fpr0 - tpr0, tnr0 - fnr0, fpr1 - tpr1, tnr1 - fnr1])

        # A_ub - 2-D array which, when matrix-multiplied by x, gives the values
        # of the upper-bound inequality constraints at x
        # b_ub - 1-D array of values representing the upper-bound of each
        # inequality constraint (row) in A_ub.
        # Just to keep these between zero and one
        A_ub = np.array(
            [[1, 0, 0, 0], [-1, 0, 0, 0], [0, 1, 0, 0], [0, -1, 0, 0],
             [0, 0, 1, 0], [0, 0, -1, 0], [0, 0, 0, 1], [0, 0, 0, -1]],
            dtype=np.float64)
        b_ub = np.array([1, 0, 1, 0, 1, 0, 1, 0], dtype=np.float64)

        # Create boolean conditioning vectors for protected groups
        cond_vec_priv = utils.compute_boolean_conditioning_vector(
            dataset_pred.protected_attributes,
            dataset_pred.protected_attribute_names, self.privileged_groups)
        cond_vec_unpriv = utils.compute_boolean_conditioning_vector(
            dataset_pred.protected_attributes,
            dataset_pred.protected_attribute_names, self.unprivileged_groups)

        sconst = np.ravel(
            dataset_pred.labels[cond_vec_priv] == dataset_pred.favorable_label)
        sflip = np.ravel(dataset_pred.labels[cond_vec_priv] ==
                         dataset_pred.unfavorable_label)
        oconst = np.ravel(dataset_pred.labels[cond_vec_unpriv] ==
                          dataset_pred.favorable_label)
        oflip = np.ravel(dataset_pred.labels[cond_vec_unpriv] ==
                         dataset_pred.unfavorable_label)

        y_true = dataset_true.labels.ravel()

        sm_tn = np.logical_and(
            sflip,
            y_true[cond_vec_priv] == dataset_true.unfavorable_label,
            dtype=np.float64)
        sm_fn = np.logical_and(
            sflip,
            y_true[cond_vec_priv] == dataset_true.favorable_label,
            dtype=np.float64)
        sm_fp = np.logical_and(
            sconst,
            y_true[cond_vec_priv] == dataset_true.unfavorable_label,
            dtype=np.float64)
        sm_tp = np.logical_and(
            sconst,
            y_true[cond_vec_priv] == dataset_true.favorable_label,
            dtype=np.float64)

        om_tn = np.logical_and(
            oflip,
            y_true[cond_vec_unpriv] == dataset_true.unfavorable_label,
            dtype=np.float64)
        om_fn = np.logical_and(
            oflip,
            y_true[cond_vec_unpriv] == dataset_true.favorable_label,
            dtype=np.float64)
        om_fp = np.logical_and(
            oconst,
            y_true[cond_vec_unpriv] == dataset_true.unfavorable_label,
            dtype=np.float64)
        om_tp = np.logical_and(
            oconst,
            y_true[cond_vec_unpriv] == dataset_true.favorable_label,
            dtype=np.float64)

        # A_eq - 2-D array which, when matrix-multiplied by x,
        # gives the values of the equality constraints at x
        # b_eq - 1-D array of values representing the RHS of each equality
        # constraint (row) in A_eq.
        # Used to impose equality of odds constraint
        A_eq = [
            [(np.mean(sconst * sm_tp) - np.mean(sflip * sm_tp)) / sbr,
             (np.mean(sflip * sm_fn) - np.mean(sconst * sm_fn)) / sbr,
             (np.mean(oflip * om_tp) - np.mean(oconst * om_tp)) / obr,
             (np.mean(oconst * om_fn) - np.mean(oflip * om_fn)) / obr],
            [(np.mean(sconst * sm_fp) - np.mean(sflip * sm_fp)) / (1 - sbr),
             (np.mean(sflip * sm_tn) - np.mean(sconst * sm_tn)) / (1 - sbr),
             (np.mean(oflip * om_fp) - np.mean(oconst * om_fp)) / (1 - obr),
             (np.mean(oconst * om_tn) - np.mean(oflip * om_tn)) / (1 - obr)]
        ]

        b_eq = [
            (np.mean(oflip * om_tp) + np.mean(oconst * om_fn)) / obr -
            (np.mean(sflip * sm_tp) + np.mean(sconst * sm_fn)) / sbr,
            (np.mean(oflip * om_fp) + np.mean(oconst * om_tn)) / (1 - obr) -
            (np.mean(sflip * sm_fp) + np.mean(sconst * sm_tn)) / (1 - sbr)
        ]

        # Linear program
        self.model_params = linprog(c,
                                    A_ub=A_ub,
                                    b_ub=b_ub,
                                    A_eq=A_eq,
                                    b_eq=b_eq)

        return self