def compute_metrics(model, X_test, y_test, X_train, y_train, dataset_test, 
                    unprivileged_groups, privileged_groups, protect_attribute, 
                    print_result):
    """
    Calculate and return: model accuracy and fairness metrics
    
    Parameters
    ----------
    model: scikit-learn classifier    
    X_test: numpy 2d array
    y_test: numpy 1d array
    X_train: numpy 2d array
    y_train: numpy 1d array
    dataset_test: aif360.datasets.BinaryLabelDataset
    unprivileged_groups: list<dict>
        Dictionary where the key is the name of the sensitive column in the 
        dataset, and the value is the value of the unprivileged group in the
        dataset
    privileged_groups: list<dict>
        Dictionary where the key is the name of the sensitive column in the 
        dataset, and the value is the value of the privileged group in the
        dataset
    protect_attribute
    print_result
    """
    result = {}
    
    y_pred_test = model.predict(X_test)
    result['acc_test'] = accuracy_score(y_true=y_test, y_pred=y_pred_test)
    y_pred_train = model.predict(X_train)
    result['acc_train'] = accuracy_score(y_true=y_train, y_pred=y_pred_train)
    
    dataset_pred = dataset_test.copy()
    dataset_pred.labels = y_pred_test

    bin_metric = BinaryLabelDatasetMetric(dataset_pred, 
                                          unprivileged_groups=unprivileged_groups,
                                          privileged_groups=privileged_groups)
    result['disp_impact'] = bin_metric.disparate_impact()
    result['stat_parity'] = bin_metric.mean_difference()

    classif_metric = ClassificationMetric(dataset_test, dataset_pred, 
                                          unprivileged_groups=unprivileged_groups,
                                          privileged_groups=privileged_groups)
    result['avg_odds'] = classif_metric.average_odds_difference()
    result['equal_opport'] = classif_metric.equal_opportunity_difference()
    result['false_discovery_rate'] = classif_metric.false_discovery_rate_difference()
    result['entropy_index'] = classif_metric.generalized_entropy_index()
    result['acc_test_clf'] = classif_metric.accuracy(privileged=None)
    result['acc_test_priv'] = classif_metric.accuracy(privileged=True)
    result['acc_test_unpriv'] = classif_metric.accuracy(privileged=False)
    
    result['consistency'] = consitency(X_test, y_pred_test, protect_attribute, n_neighbors=5)
    result['counterfactual'] = counterfactual(X_test, model, protect_attribute)
    
    if print_result:
        print("Train accuracy: ", result['acc_train'])
        print("Test accuracy: ", result['acc_test'])
        print("Test accuracy clf: ", result['acc_test_clf'])
        print("Test accuracy priv.: ", result['acc_test_priv'])
        print("Test accuracy unpriv.: ", result['acc_test_unpriv'])
        print('Disparate impact: ', result['disp_impact'])
        print('Mean difference: ', result['stat_parity'])
        print('Average odds difference:', result['avg_odds'])
        print('Equality of opportunity:', result['equal_opport'])
        print('False discovery rate difference:', result['false_discovery_rate'])
        print('Generalized entropy index:', result['entropy_index'])
        print('Consistency: ', result['consistency'])
        print('Counterfactual fairness: ', result['counterfactual'])

    return result
예제 #2
0
def compute_metrics(model, X_test, y_test, X_train, y_train, dataset_test,
                    dataset_name, model_name, unprivileged_groups,
                    privileged_groups, position):
    """
    Calculate and return: model accuracy and fairness metrics
    
    Parameters
    ----------
    model: scikit-learn classifier    
    X_test: numpy 2d array
    y_test: numpy 1d array
    X_train: numpy 2d array
    y_train: numpy 1d array
    dataset_test: aif360.datasets.BinaryLabelDataset
    dataset_name: string
        Dataset name used in the analysis
    model_name: string
    unprivileged_groups: list<dict>
        Dictionary where the key is the name of the sensitive column in the 
        dataset, and the value is the value of the unprivileged group in the
        dataset
    privileged_groups: list<dict>
        Dictionary where the key is the name of the sensitive column in the 
        dataset, and the value is the value of the privileged group in the
        dataset
    position: int
        Column position of the sensitive group in the dataset 
    """

    y_pred_test = model.predict(X_test)
    acc_test = accuracy_score(y_true=y_test, y_pred=y_pred_test)
    print("Test accuracy: ", acc_test)
    y_pred_train = model.predict(X_train)
    acc_train = accuracy_score(y_true=y_train, y_pred=y_pred_train)
    print("Train accuracy: ", acc_train)

    dataset_pred = dataset_test.copy()
    dataset_pred.labels = y_pred_test

    bin_metric = BinaryLabelDatasetMetric(
        dataset_pred,
        unprivileged_groups=unprivileged_groups,
        privileged_groups=privileged_groups)
    disparate_impact_bin = bin_metric.disparate_impact()
    print('Disparate impact: ', disparate_impact_bin)
    mean_difference = bin_metric.mean_difference()
    print('Mean difference: ', mean_difference)

    classif_metric = ClassificationMetric(
        dataset_test,
        dataset_pred,
        unprivileged_groups=unprivileged_groups,
        privileged_groups=privileged_groups)
    classif_disparete_impact = classif_metric.disparate_impact()
    avg_odds = classif_metric.average_odds_difference()
    print('Average odds difference:', avg_odds)
    equal_opport = classif_metric.equal_opportunity_difference()
    print('Equality of opportunity:', equal_opport)
    false_discovery_rate = classif_metric.false_discovery_rate_difference()
    print('False discovery rate difference:', false_discovery_rate)
    entropy_index = classif_metric.generalized_entropy_index()
    print('Generalized entropy index:', entropy_index)

    cons_comp = consitency_mod(bin_metric, position, n_neighbors=5)
    print('Consistency: ', cons_comp)

    result = (dataset_name, model_name, acc_test, disparate_impact_bin,
              mean_difference, classif_disparete_impact, avg_odds,
              equal_opport, false_discovery_rate, entropy_index, cons_comp)

    return result