class DoCalculusRelationships: def __init__(self): dataset = Dataset('config_compas.json') x, y = dataset.get_data(readable=True) # r = "Af_vs_all" r = "Af_vs_Caucasian" # r = "all" x, y = get_dataframe(x, y, requested=r) self.finder = RelationshipsFinder(pd.concat([x, y], axis=1)) def get_do_distributions(self): def adjust_race(x): return x['probability'] * marginal_race[marginal_race['race'] == x['race']]['probability'].item() def adjust_sex(x): return x['probability'] * marginal_sex[marginal_sex['sex'] == x['sex']]['probability'].item() do_conditional = self.finder.get_conditional_distribution(['is_recid'], ['race', 'sex']) marginal_race = self.finder.get_marginal_distribution(['race']) marginal_sex = self.finder.get_marginal_distribution(['sex']) do_sex = do_conditional.copy() do_sex['probability'] = do_sex.apply(adjust_race, axis=1) do_sex = do_sex.groupby(['is_recid', 'sex']).sum().reset_index() do_race = do_conditional.copy() do_race['probability'] = do_race.apply(adjust_sex, axis=1) do_race = do_race.groupby(['is_recid', 'race']).sum().reset_index() return do_conditional, do_race, do_sex
def __init__(self): dataset = Dataset('config_compas.json') x, y = dataset.get_data(readable=True) # r = "Af_vs_all" r = "Af_vs_Caucasian" # r = "all" x, y = get_dataframe(x, y, requested=r) self.finder = RelationshipsFinder(pd.concat([x, y], axis=1))
def generate_independencies(x, y): finder = RelationshipsFinder(pd.concat([x, y], axis=1)) tolerances = [ 0.005, 0.01, 0.015, 0.02, 0.025, 0.03, 0.035, 0.04, 0.045, 0.05, 0.055, 0.06, 0.065, 0.07, 0.075, 0.08, 0.085, 0.09, 0.095, 0.1 ] for tolerance in tolerances: print("Tolerance: ", tolerance) finder.find_relationships_difference(tolerance) print("\n")
def demographic_parity(x: pd.DataFrame, y_hat: pd.DataFrame, sensitive_attributes: list): data = pd.concat([x, y_hat], axis=1) finder = RelationshipsFinder(data) return finder.get_conditional_distribution(['y_hat'], sensitive_attributes)
def equalized_odds(x, y, y_hat, sensitive_attributes): data = pd.concat([x, y, y_hat], axis=1) finder = RelationshipsFinder(data) return finder.get_conditional_distribution(['y_hat'], sensitive_attributes + ['y'])
train_x = self.train_data[[ 'a_1', 'a_2', 'v_1', 'v_2', 'v_3', 'v_4', 'v_5', 'v_6' ]] train_y = self.train_data[['y']] test_x = self.test_data[[ 'a_1', 'a_2', 'v_1', 'v_2', 'v_3', 'v_4', 'v_5', 'v_6' ]] test_y = self.test_data[['y']] self.classifier.fit(train_x, train_y) y_hat = self.classifier.predict(test_x) accuracy = accuracy_score(test_y, y_hat) y_hat = pd.DataFrame(y_hat).rename({0: 'y_hat'}, axis=1) dp = Metrics.demographic_parity(test_x, y_hat, ['a_1']) print('ok') if __name__ == '__main__': dataset = SCM.generate_dataset() dataset.to_csv('sample_dataset.csv', index=False) # plt.hist(dataset['y']) # plt.show() # plt.hist(dataset[dataset.a_1==0]['y']) # plt.show() # plt.hist(dataset[dataset.a_1 == 1]['y']) # plt.show() fair = SigmaFair('sample_dataset.csv') finder = RelationshipsFinder(fair.data) fair.train_classifier() fair.train_fair_classifiers() print('ok')