def odds_diff(random_data, predicted_data, target_variable, protected_variable, unprivileged_input): random_data['Pred'] = np.random.binomial(1, .5, 1000) dataset = BinaryLabelDataset(df=random_data, label_names=[target_variable], protected_attribute_names=[protected_variable]) classified_dataset = BinaryLabelDataset(df=predicted_data, label_names=[target_variable], protected_attribute_names=[protected_variable]) privileged_group = [] for v in predicted_data[protected_variable].unique()[predicted_data[protected_variable].unique() != unprivileged_input]: privileged_group.append({protected_variable: v}) unprivileged_group = [{protected_variable: unprivileged_input}] #female=0 metric = ClassificationMetric(dataset, classified_dataset, unprivileged_group, privileged_group) print(metric.average_abs_odds_difference()) if abs(metric.average_abs_odds_difference().round(3)) < 0.2: print('The algorithm can be considered to be not biased') else: print('There is a potential bias')
def calculate_bias_measures(data_orig_train, data_orig_vt, unprivileged_groups, privileged_groups): model = RandomForestClassifier().fit( data_orig_train.features, data_orig_train.labels.ravel(), sample_weight=data_orig_train.instance_weights) dataset = data_orig_vt dataset_pred = dataset.copy() dataset_pred.labels = model.predict(data_orig_vt.features) classified_metric_race = ClassificationMetric( dataset, dataset_pred, unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups) metric_pred_race = BinaryLabelDatasetMetric( dataset_pred, unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups) print("Mean difference {}".format(metric_pred_race.mean_difference())) print("Disparate Metric {}".format(metric_pred_race.disparate_impact())) print("Equal Opportunity Difference {}".format( classified_metric_race.equal_opportunity_difference())) print("Average Abs Odds Difference {}".format( classified_metric_race.average_abs_odds_difference())) print("Theil index {}".format(classified_metric_race.theil_index()))
def fair_metrics(dataset, pred, pred_is_dataset=False): if pred_is_dataset: dataset_pred = pred else: dataset_pred = dataset.copy() dataset_pred.labels = pred cols = [ 'statistical_parity_difference', 'equal_opportunity_difference', 'average_abs_odds_difference', 'disparate_impact', 'theil_index' ] obj_fairness = [[0, 0, 0, 1, 0]] fair_metrics = pd.DataFrame(data=obj_fairness, index=['objective'], columns=cols) for attr in dataset_pred.protected_attribute_names: idx = dataset_pred.protected_attribute_names.index(attr) privileged_groups = [{ attr: dataset_pred.privileged_protected_attributes[idx][0] }] unprivileged_groups = [{ attr: dataset_pred.unprivileged_protected_attributes[idx][0] }] classified_metric = ClassificationMetric( dataset, dataset_pred, unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups) metric_pred = BinaryLabelDatasetMetric( dataset_pred, unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups) acc = classified_metric.accuracy() row = pd.DataFrame([[ metric_pred.mean_difference(), classified_metric.equal_opportunity_difference(), classified_metric.average_abs_odds_difference(), metric_pred.disparate_impact(), classified_metric.theil_index() ]], columns=cols, index=[attr]) fair_metrics = fair_metrics.append(row) fair_metrics = fair_metrics.replace([-np.inf, np.inf], 2) return fair_metrics
def test_eqodds(): eqo = EqOddsPostprocessing(unprivileged_groups=[{ 'sex': 0 }], privileged_groups=[{ 'sex': 1 }], seed=1234567) pred_eqo = eqo.fit(val, val_pred).predict(pred) cm_eqo = ClassificationMetric(test, pred_eqo, unprivileged_groups=[{ 'sex': 0 }], privileged_groups=[{ 'sex': 1 }]) # accuracy drop should be less than 10% (arbitrary) assert (cm_lr.accuracy() - cm_eqo.accuracy()) / cm_lr.accuracy() < 0.1 # approximately equal odds assert cm_eqo.average_abs_odds_difference() < 0.1
def get_fair_metrics(dataset, pred, pred_is_dataset=False): """ Measure fairness metrics. Parameters: dataset (pandas dataframe): Dataset pred (array): Model predictions pred_is_dataset, optional (bool): True if prediction is already part of the dataset, column name 'labels'. Returns: fair_metrics: Fairness metrics. """ if pred_is_dataset: dataset_pred = pred else: dataset_pred = dataset.copy() dataset_pred.labels = pred cols = [ 'statistical_parity_difference', 'equal_opportunity_difference', 'average_abs_odds_difference', 'disparate_impact', 'theil_index' ] obj_fairness = [[0, 0, 0, 1, 0]] fair_metrics = pd.DataFrame(data=obj_fairness, index=['objective'], columns=cols) for attr in dataset_pred.protected_attribute_names: idx = dataset_pred.protected_attribute_names.index(attr) privileged_groups = [{ attr: dataset_pred.privileged_protected_attributes[idx][0] }] unprivileged_groups = [{ attr: dataset_pred.unprivileged_protected_attributes[idx][0] }] classified_metric = ClassificationMetric( dataset, dataset_pred, unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups) metric_pred = BinaryLabelDatasetMetric( dataset_pred, unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups) acc = classified_metric.accuracy() row = pd.DataFrame([[ metric_pred.mean_difference(), classified_metric.equal_opportunity_difference(), classified_metric.average_abs_odds_difference(), metric_pred.disparate_impact(), classified_metric.theil_index() ]], columns=cols, index=[attr]) fair_metrics = fair_metrics.append(row) fair_metrics = fair_metrics.replace([-np.inf, np.inf], 2) return fair_metrics
# ============================================================================= # FAIRNESS METRICS (FOR REAL THIS TIME) # ============================================================================= # TODO: (1) Redo the previous cell for gender bias and recompute the corresponding # fairness metrics # (2)collect these values in a table # (3) think about a way to visualize these values # Statistical Parity difference (SPD) spd_pre_race = fairness_metrics.statistical_parity_difference() # Disparate Impact Ratio dir_pre_race = fairness_metrics.disparate_impact() # Average Odds Difference and Average absolute odds difference aod_pre_race = fairness_metrics.average_odds_difference() aaod_pre_race = fairness_metrics.average_abs_odds_difference() # Equal Opportunity Difference aka true positive rate difference eod_pre_race = fairness_metrics.equal_opportunity_difference() # Generealized entropy index with various alpha's fairness_metrics.between_all_groups_generalized_entropy_index(alpha=2) ClassificationMetric(dataset=bld_true, classified_dataset=bld_pred, unprivileged_groups=None, privileged_groups=None).false_positive_rate() df_fm.head() # TO DELETE