class DoCalculusRelationships:
    def __init__(self):
        dataset = Dataset('config_compas.json')
        x, y = dataset.get_data(readable=True)
        # r = "Af_vs_all"
        r = "Af_vs_Caucasian"
        # r = "all"
        x, y = get_dataframe(x, y, requested=r)
        self.finder = RelationshipsFinder(pd.concat([x, y], axis=1))

    def get_do_distributions(self):
        def adjust_race(x):
            return x['probability'] * marginal_race[marginal_race['race'] == x['race']]['probability'].item()

        def adjust_sex(x):
            return x['probability'] * marginal_sex[marginal_sex['sex'] == x['sex']]['probability'].item()

        do_conditional = self.finder.get_conditional_distribution(['is_recid'], ['race', 'sex'])
        marginal_race = self.finder.get_marginal_distribution(['race'])
        marginal_sex = self.finder.get_marginal_distribution(['sex'])
        do_sex = do_conditional.copy()
        do_sex['probability'] = do_sex.apply(adjust_race, axis=1)
        do_sex = do_sex.groupby(['is_recid', 'sex']).sum().reset_index()
        do_race = do_conditional.copy()
        do_race['probability'] = do_race.apply(adjust_sex, axis=1)
        do_race = do_race.groupby(['is_recid', 'race']).sum().reset_index()
        return do_conditional, do_race, do_sex
 def __init__(self):
     dataset = Dataset('config_compas.json')
     x, y = dataset.get_data(readable=True)
     # r = "Af_vs_all"
     r = "Af_vs_Caucasian"
     # r = "all"
     x, y = get_dataframe(x, y, requested=r)
     self.finder = RelationshipsFinder(pd.concat([x, y], axis=1))
예제 #3
0
def generate_independencies(x, y):
    finder = RelationshipsFinder(pd.concat([x, y], axis=1))

    tolerances = [
        0.005, 0.01, 0.015, 0.02, 0.025, 0.03, 0.035, 0.04, 0.045, 0.05, 0.055,
        0.06, 0.065, 0.07, 0.075, 0.08, 0.085, 0.09, 0.095, 0.1
    ]
    for tolerance in tolerances:
        print("Tolerance: ", tolerance)
        finder.find_relationships_difference(tolerance)
        print("\n")
예제 #4
0
 def demographic_parity(x: pd.DataFrame, y_hat: pd.DataFrame, sensitive_attributes: list):
     data = pd.concat([x, y_hat], axis=1)
     finder = RelationshipsFinder(data)
     return finder.get_conditional_distribution(['y_hat'], sensitive_attributes)
예제 #5
0
 def equalized_odds(x, y, y_hat, sensitive_attributes):
     data = pd.concat([x, y, y_hat], axis=1)
     finder = RelationshipsFinder(data)
     return finder.get_conditional_distribution(['y_hat'], sensitive_attributes + ['y'])
예제 #6
0
        train_x = self.train_data[[
            'a_1', 'a_2', 'v_1', 'v_2', 'v_3', 'v_4', 'v_5', 'v_6'
        ]]
        train_y = self.train_data[['y']]
        test_x = self.test_data[[
            'a_1', 'a_2', 'v_1', 'v_2', 'v_3', 'v_4', 'v_5', 'v_6'
        ]]
        test_y = self.test_data[['y']]
        self.classifier.fit(train_x, train_y)
        y_hat = self.classifier.predict(test_x)
        accuracy = accuracy_score(test_y, y_hat)
        y_hat = pd.DataFrame(y_hat).rename({0: 'y_hat'}, axis=1)
        dp = Metrics.demographic_parity(test_x, y_hat, ['a_1'])
        print('ok')


if __name__ == '__main__':
    dataset = SCM.generate_dataset()
    dataset.to_csv('sample_dataset.csv', index=False)
    # plt.hist(dataset['y'])
    # plt.show()
    # plt.hist(dataset[dataset.a_1==0]['y'])
    # plt.show()
    # plt.hist(dataset[dataset.a_1 == 1]['y'])
    # plt.show()
    fair = SigmaFair('sample_dataset.csv')
    finder = RelationshipsFinder(fair.data)
    fair.train_classifier()
    fair.train_fair_classifiers()
    print('ok')