Ejemplo n.º 1
0
def get_metric_reports(true_dataset,classfied_dataset,privileged_groups,unprivileged_groups):

	mirror_dataset=classfied_dataset.copy(deepcopy=True)
	mirror_dataset.labels=copy.deepcopy(true_dataset.labels)

	metric=ClassificationMetric(
		dataset=mirror_dataset,
		classified_dataset=classfied_dataset,
		unprivileged_groups=unprivileged_groups,
		privileged_groups=privileged_groups)
	#Measuring unfairness end
	
	report=OrderedDict()
	report['TPR']=metric.true_positive_rate()
	report['TNR']=metric.true_negative_rate()
	report['FPR']=metric.false_positive_rate()
	report['FNR']=metric.false_negative_rate()
	report['Balanced_Acc']=0.5*(report['TPR']+report['TNR'])
	report['Acc']=metric.accuracy()
	report["Statistical parity difference"]=metric.statistical_parity_difference()
	report["Disparate impact"]=metric.disparate_impact()
	report["Equal opportunity difference"]=metric.equal_opportunity_difference()
	report["Average odds difference"]=metric.average_odds_difference()
	report["Theil index"]=metric.theil_index()
	report["United Fairness"]=metric.generalized_entropy_index()

	return report
Ejemplo n.º 2
0
    def fit(self, dataset_true, dataset_pred):
        """Compute parameters for equalizing odds using true and predicted
        labels.

        Args:
            true_dataset (BinaryLabelDataset): Dataset containing true labels.
            pred_dataset (BinaryLabelDataset): Dataset containing predicted
                labels.

        Returns:
            EqOddsPostprocessing: Returns self.
        """
        metric = ClassificationMetric(
            dataset_true,
            dataset_pred,
            unprivileged_groups=self.unprivileged_groups,
            privileged_groups=self.privileged_groups)

        # compute basic statistics
        sbr = metric.num_instances(privileged=True) / metric.num_instances()
        obr = metric.num_instances(privileged=False) / metric.num_instances()

        fpr0 = metric.false_positive_rate(privileged=True)
        fpr1 = metric.false_positive_rate(privileged=False)
        fnr0 = metric.false_negative_rate(privileged=True)
        fnr1 = metric.false_negative_rate(privileged=False)
        tpr0 = metric.true_positive_rate(privileged=True)
        tpr1 = metric.true_positive_rate(privileged=False)
        tnr0 = metric.true_negative_rate(privileged=True)
        tnr1 = metric.true_negative_rate(privileged=False)

        # linear program has 4 decision variables:
        # [Pr[label_tilde = 1 | label_hat = 1, protected_attributes = 0];
        #  Pr[label_tilde = 1 | label_hat = 0, protected_attributes = 0];
        #  Pr[label_tilde = 1 | label_hat = 1, protected_attributes = 1];
        #  Pr[label_tilde = 1 | label_hat = 0, protected_attributes = 1]]
        # Coefficients of the linear objective function to be minimized.
        c = np.array([fpr0 - tpr0, tnr0 - fnr0, fpr1 - tpr1, tnr1 - fnr1])

        # A_ub - 2-D array which, when matrix-multiplied by x, gives the values
        # of the upper-bound inequality constraints at x
        # b_ub - 1-D array of values representing the upper-bound of each
        # inequality constraint (row) in A_ub.
        # Just to keep these between zero and one
        A_ub = np.array(
            [[1, 0, 0, 0], [-1, 0, 0, 0], [0, 1, 0, 0], [0, -1, 0, 0],
             [0, 0, 1, 0], [0, 0, -1, 0], [0, 0, 0, 1], [0, 0, 0, -1]],
            dtype=np.float64)
        b_ub = np.array([1, 0, 1, 0, 1, 0, 1, 0], dtype=np.float64)

        # Create boolean conditioning vectors for protected groups
        cond_vec_priv = utils.compute_boolean_conditioning_vector(
            dataset_pred.protected_attributes,
            dataset_pred.protected_attribute_names, self.privileged_groups)[0]
        cond_vec_unpriv = utils.compute_boolean_conditioning_vector(
            dataset_pred.protected_attributes,
            dataset_pred.protected_attribute_names,
            self.unprivileged_groups)[0]

        sconst = np.ravel(
            dataset_pred.labels[cond_vec_priv] == dataset_pred.favorable_label)
        sflip = np.ravel(dataset_pred.labels[cond_vec_priv] ==
                         dataset_pred.unfavorable_label)
        oconst = np.ravel(dataset_pred.labels[cond_vec_unpriv] ==
                          dataset_pred.favorable_label)
        oflip = np.ravel(dataset_pred.labels[cond_vec_unpriv] ==
                         dataset_pred.unfavorable_label)

        y_true = dataset_true.labels.ravel()

        sm_tn = np.logical_and(
            sflip,
            y_true[cond_vec_priv] == dataset_true.unfavorable_label,
            dtype=np.float64)
        sm_fn = np.logical_and(
            sflip,
            y_true[cond_vec_priv] == dataset_true.favorable_label,
            dtype=np.float64)
        sm_fp = np.logical_and(
            sconst,
            y_true[cond_vec_priv] == dataset_true.unfavorable_label,
            dtype=np.float64)
        sm_tp = np.logical_and(
            sconst,
            y_true[cond_vec_priv] == dataset_true.favorable_label,
            dtype=np.float64)

        om_tn = np.logical_and(
            oflip,
            y_true[cond_vec_unpriv] == dataset_true.unfavorable_label,
            dtype=np.float64)
        om_fn = np.logical_and(
            oflip,
            y_true[cond_vec_unpriv] == dataset_true.favorable_label,
            dtype=np.float64)
        om_fp = np.logical_and(
            oconst,
            y_true[cond_vec_unpriv] == dataset_true.unfavorable_label,
            dtype=np.float64)
        om_tp = np.logical_and(
            oconst,
            y_true[cond_vec_unpriv] == dataset_true.favorable_label,
            dtype=np.float64)

        # A_eq - 2-D array which, when matrix-multiplied by x,
        # gives the values of the equality constraints at x
        # b_eq - 1-D array of values representing the RHS of each equality
        # constraint (row) in A_eq.
        # Used to impose equality of odds constraint
        A_eq = [
            [(np.mean(sconst * sm_tp) - np.mean(sflip * sm_tp)) / sbr,
             (np.mean(sflip * sm_fn) - np.mean(sconst * sm_fn)) / sbr,
             (np.mean(oflip * om_tp) - np.mean(oconst * om_tp)) / obr,
             (np.mean(oconst * om_fn) - np.mean(oflip * om_fn)) / obr],
            [(np.mean(sconst * sm_fp) - np.mean(sflip * sm_fp)) / (1 - sbr),
             (np.mean(sflip * sm_tn) - np.mean(sconst * sm_tn)) / (1 - sbr),
             (np.mean(oflip * om_fp) - np.mean(oconst * om_fp)) / (1 - obr),
             (np.mean(oconst * om_tn) - np.mean(oflip * om_tn)) / (1 - obr)]
        ]

        b_eq = [
            (np.mean(oflip * om_tp) + np.mean(oconst * om_fn)) / obr -
            (np.mean(sflip * sm_tp) + np.mean(sconst * sm_fn)) / sbr,
            (np.mean(oflip * om_fp) + np.mean(oconst * om_tn)) / (1 - obr) -
            (np.mean(sflip * sm_fp) + np.mean(sconst * sm_tn)) / (1 - sbr)
        ]

        # Linear program
        self.model_params = linprog(c,
                                    A_ub=A_ub,
                                    b_ub=b_ub,
                                    A_eq=A_eq,
                                    b_eq=b_eq)

        return self
        dataset_transf_valid_pred = cpp.predict(dataset_orig_valid_pred)
        dataset_transf_test_pred = cpp.predict(dataset_orig_test_pred)

        cm_transf_valid = ClassificationMetric(dataset_orig_valid, dataset_transf_valid_pred,
                                    unprivileged_groups=unprivileged_groups,
                                    privileged_groups=privileged_groups)

        cm_transf_test = ClassificationMetric(dataset_orig_test, dataset_transf_test_pred,
                                    unprivileged_groups=unprivileged_groups,
                                    privileged_groups=privileged_groups)

        #cm_transf_test.difference
        
        for idx,PR in enumerate(privileged_options):
            gfnr[idx] += cm_transf_test.false_negative_rate(privileged=PR)
            gfpr[idx] += cm_transf_test.false_positive_rate(privileged=PR)
            result = cm_transf_test.accuracy(privileged=PR)
            acc[idx] += float(result)

        pbar.update(1)

    fns.append(gfnr/N_reps)
    fps.append(gfpr/N_reps)
    accs.append(acc/N_reps)
    negs.append(neg)


collapse = lambda param, idx : [v[idx] for v in param]

getnames = {None:"full data", True:"privileged", False:"unprivileged"}