def test_impact_ratio(self): # no discrimination data = pd.DataFrame({'target': [1, 1, 0, 0, 1, 1, 0, 0], 'protected': [0, 1, 0, 1, 0, 1, 0, 1]}) dataset = Dataset(data) self.assertEqual(1, am.impact_ratio(dataset, "target", "protected")) #=========================================================================== # maximal discrimination data = pd.DataFrame({'target': [1, 0, 1, 0, 1, 0, 1, 0], 'protected': [0, 1, 0, 1, 0, 1, 0, 1]}) dataset = Dataset(data) self.assertEqual(0, am.impact_ratio(dataset, "target", "protected")) #=========================================================================== # bit of discrimination data = pd.DataFrame({'target': [1, 1, 0, 0, 1, 0, 1, 0], 'protected': [0, 1, 0, 1, 0, 1, 0, 1]}) dataset = Dataset(data) self.assertEqual(1 / 3, am.impact_ratio(dataset, "target", "protected")) #=========================================================================== # inverse discrimination should return nan, because division by zero is performed data = pd.DataFrame({'target': [0, 1, 0, 1, 0, 1, 0, 1], 'protected': [0, 1, 0, 1, 0, 1, 0, 1]}) dataset = Dataset(data) self.assertEqual(np.inf, am.impact_ratio(dataset, "target", "protected"))
def test_odds_ratio(self): # no discrimination data = pd.DataFrame({'target': [1, 1, 0, 0, 1, 1, 0, 0], 'protected': [0, 1, 0, 1, 0, 1, 0, 1]}) dataset = Dataset(data) self.assertEqual(1, am.odds_ratio(dataset, "target", "protected")) #=========================================================================== # the probability of being accepted as a protected group member is in this case zero # hence should return nan data = pd.DataFrame({'target': [1, 0, 1, 0, 1, 0, 1, 0], 'protected': [0, 1, 0, 1, 0, 1, 0, 1]}) dataset = Dataset(data) self.assertEqual(np.inf, am.odds_ratio(dataset, "target", "protected")) #=========================================================================== # inverse discrimination data = pd.DataFrame({'target': [0, 1, 0, 1, 0, 1, 0, 1], 'protected': [0, 1, 0, 1, 0, 1, 0, 1]}) dataset = Dataset(data) self.assertEqual(0, am.odds_ratio(dataset, "target", "protected")) #=========================================================================== # bit of discrimination, value should be greater than one data = pd.DataFrame({'target': [1, 1, 0, 0, 1, 0, 1, 0], 'protected': [0, 1, 0, 1, 0, 1, 0, 1]}) dataset = Dataset(data) self.assertGreater(am.odds_ratio(dataset, "target", "protected"), 1)
def test_normalized_difference(self): # no discrimination data = pd.DataFrame({ 'target': [1, 1, 0, 0, 1, 1, 0, 0], 'protected': [0, 1, 0, 1, 0, 1, 0, 1] }) dataset = Dataset(data) self.assertEqual( 0, am.normalized_difference(dataset, "target", "protected")) #=========================================================================== # maximal discrimination data = pd.DataFrame({ 'target': [1, 0, 1, 0, 1, 0, 1, 0], 'protected': [0, 1, 0, 1, 0, 1, 0, 1] }) dataset = Dataset(data) self.assertEqual( 1, am.normalized_difference(dataset, "target", "protected")) #=========================================================================== # bit of discrimination data = pd.DataFrame({ 'target': [1, 1, 0, 0, 1, 0, 1, 0], 'protected': [0, 1, 0, 1, 0, 1, 0, 1] }) dataset = Dataset(data) self.assertEqual( 0.5, am.normalized_difference(dataset, "target", "protected")) #=========================================================================== # if no-one is selected function would raise zero division error data = pd.DataFrame({ 'target': [0, 0, 0, 0, 0, 0, 0, 0], 'protected': [0, 1, 0, 1, 0, 1, 0, 1] }) dataset = Dataset(data) self.assertRaises(ZeroDivisionError, am.normalized_difference, dataset, "target", "protected") #=========================================================================== # if everybody is selected function would raise zero division error data = pd.DataFrame({ 'target': [1, 1, 1, 1, 1, 1, 1, 1], 'protected': [0, 1, 0, 1, 0, 1, 0, 1] }) dataset = Dataset(data) self.assertRaises(ZeroDivisionError, am.normalized_difference, dataset, "target", "protected")
def test_dataframeCreation(self): dataset = Dataset(THIS_DIR + '/correctFile.csv') self.assertEqual((3, 4), dataset.data.shape, "dataset has wrong dimensions") with self.assertRaises(ValueError): dataset = Dataset(THIS_DIR + '/incorrectFileNoProtected.csv') with self.assertRaises(ValueError): dataset = Dataset(THIS_DIR + '/incorrectFileNoTarget.csv')
def test_prob_positive_classification(self): data = pd.DataFrame({ 'target': [1, 1, 1, 1, 0, 0, 0, 0], 'protected': [0, 1, 0, 1, 0, 1, 0, 1] }) dataset = Dataset(data) expected = 0.5 actual = dataset.prob_positive_classification("target") self.assertEqual(expected, actual) #============================================================================== data = pd.DataFrame({ 'target': [0, 0, 0, 0, 0, 0, 0, 0], 'protected': [0, 1, 0, 1, 0, 1, 0, 1] }) dataset = Dataset(data) expected = 0 actual = dataset.prob_positive_classification("target") self.assertEqual(expected, actual) #============================================================================== data = pd.DataFrame({ 'target': [1, 1, 1, 1, 1, 1, 1, 1], 'protected': [0, 1, 0, 1, 0, 1, 0, 1] }) dataset = Dataset(data) expected = 1 actual = dataset.prob_positive_classification("target") self.assertEqual(expected, actual)
def test_mean_difference(self): data = pd.DataFrame({ 'target': [1, 2, 3, 4, 5, 6, 7, 8], 'protected': [0, 1, 2, 3, 0, 1, 2, 3] }) dataset = Dataset(data) expected_result_0 = pd.DataFrame({'target': [-1, -2, -3]}, index=[1, 2, 3], dtype=np.float64) actual_result_0 = am.mean_difference(dataset, 'target', 'protected', non_protected=0) self.assertTrue(expected_result_0.equals(actual_result_0)) #========================================================================== expected_result_1 = pd.DataFrame({'target': [1, -1, -2]}, index=[0, 2, 3], dtype=np.float64) actual_result_1 = am.mean_difference(dataset, 'target', 'protected', non_protected=1) self.assertTrue(expected_result_1.equals(actual_result_1))
def test_normalize_column(self): data = pd.DataFrame({ 'target1': [1, 2, 3, 4, 5, 6, 7, 8, 9], 'protected': [0, 1, 2, 3, 0, 1, 2, 3, 0] }) dataset = Dataset(data) expected_result = pd.DataFrame({ 'target1': [-0.5, -0.375, -0.25, -0.125, 0, 0.125, 0.25, 0.375, 0.5], 'protected': [0, 1, 2, 3, 0, 1, 2, 3, 0] }) dataset.normalize_column('target1') testing.assert_frame_equal(expected_result, dataset.data, check_less_precise=True)
def test_get_all_targets_of_group(self): data = pd.DataFrame({ 'target': [1, 1, 1, 1, 0, 0, 0, 0], 'protected': [0, 1, 0, 1, 0, 1, 0, 1] }) dataset = Dataset(data) expected = [1, 1, 0, 0] actual = dataset.get_all_targets_of_group("target", "protected", 0) self.assertCountEqual(expected, actual) actual = dataset.get_all_targets_of_group("target", "protected", 1) self.assertCountEqual(expected, actual) #============================================================================= # if noone of the desired group is in the dataset, should return empty array data = pd.DataFrame({ 'target': [1, 1, 1, 1, 0, 0, 0, 0], 'protected': [1, 1, 1, 1, 1, 1, 1, 1] }) dataset = Dataset(data) actual = dataset.get_all_targets_of_group("target", "protected", 0) self.assertFalse(actual)
def test_impact_ratio(self): # no discrimination data = pd.DataFrame({ 'target': [1, 1, 0, 0, 1, 1, 0, 0], 'protected': [0, 1, 0, 1, 0, 1, 0, 1] }) dataset = Dataset(data) self.assertEqual(1, am.impact_ratio(dataset, "target", "protected")) #=========================================================================== # maximal discrimination data = pd.DataFrame({ 'target': [1, 0, 1, 0, 1, 0, 1, 0], 'protected': [0, 1, 0, 1, 0, 1, 0, 1] }) dataset = Dataset(data) self.assertEqual(0, am.impact_ratio(dataset, "target", "protected")) #=========================================================================== # discrimination against protected data = pd.DataFrame({ 'target': [1, 1, 0, 0, 1, 0, 1, 0], 'protected': [0, 1, 0, 1, 0, 1, 0, 1] }) dataset = Dataset(data) self.assertEqual(0.5, am.impact_ratio(dataset, "target", "protected")) #=========================================================================== # inverse discrimination data = pd.DataFrame({ 'target': [0, 1, 0, 1, 0, 1, 0, 1], 'protected': [0, 1, 0, 1, 0, 1, 0, 1] }) dataset = Dataset(data) self.assertEqual(2, am.impact_ratio(dataset, "target", "protected"))
def main(): # check python version if sys.version_info[1] < 3.5: raise Exception("Please use Python 3.5 or above to run") numpy.seterr(all='raise') # create the top-level parser parser = argparse.ArgumentParser( prog='fairness benchmarks', description= 'performs various discrimination group_fairness_metrics on a given dataset', epilog="=== === === end === === ===") parser.add_argument("-f", "--file", nargs='*', help="provide a dataset as csv-file to the algorithms") subparsers = parser.add_subparsers(help='sub-command help') # create the parser for the "file" command parser_file = subparsers.add_parser( 'file', help='provide a csv file containing a dataset') parser_file.add_argument(dest='file_to_read') # run demo parser.add_argument("-d", "--demo", dest='demo', action='store_true', help="run all algorithms with an example dataset") args = parser.parse_args() if (args.demo == True): run_demo('demo.csv') return # read file into dataframe if (args.file == None): raise ValueError("Please provide a csv-file") dataset = Dataset(args.file[0])
def run_demo(filename): print( 'Running all measures with an example dataset and prints results to stdout. Please note, that this dataset was created artificially.' ) dataset = Dataset(filename) print('=========== difference of means test =============') print(t_test_ind(dataset, 'target_score', 'protected_sex')) print('\n=========== mean differences ==============') print(mean_difference(dataset, 'target_score', 'protected_sex').T) print('\n=========== normalized differences ============') print( normalized_difference(dataset, 'target_loan_approved', 'protected_sex')) print('\n=========== impact ratio ============') print(impact_ratio(dataset, 'target_loan_approved', 'protected_sex')) print('\n=========== odds ratio ============') print(fisher_exact(dataset, 'target_loan_approved', 'protected_sex'))
def test_count_classification_and_category(self): data = pd.DataFrame({ 'target': [1, 1, 1, 1, 1, 0, 0, 0], 'protected': [0, 1, 0, 1, 0, 1, 0, 1] }) dataset = Dataset(data) self.assertEqual( 1, dataset.count_classification_and_category("target", "protected", 0, 0)) self.assertEqual( 3, dataset.count_classification_and_category("target", "protected", 0, 1)) self.assertEqual( 2, dataset.count_classification_and_category("target", "protected", 1, 0)) self.assertEqual( 2, dataset.count_classification_and_category("target", "protected", 1, 1))
def test_conditional_prob_of_acceptance(self): data = pd.DataFrame({ 'target': [1, 1, 1, 1, 1, 1, 1, 1], 'protected': [0, 1, 0, 1, 0, 1, 0, 1] }) dataset = Dataset(data) expected_result = {0: 1.0, 1: 1.0} actual_result = dataset.conditional_prob_for_group_category( "target", "protected", 1) self.assertDictEqual(expected_result, actual_result) #======================================================================== data = pd.DataFrame({ 'target': [0, 0, 0, 0, 0, 0, 0, 0], 'protected': [0, 1, 0, 1, 0, 1, 0, 1] }) dataset = Dataset(data) expected_result = {0: 0.0, 1: 0.0} actual_result = dataset.conditional_prob_for_group_category( "target", "protected", 1) self.assertDictEqual(expected_result, actual_result) #========================================================================= data = pd.DataFrame({ 'target': [1, 1, 1, 1, 0, 0, 0, 0], 'protected': [0, 1, 0, 1, 0, 1, 0, 1] }) dataset = Dataset(data) expected_result = {0: 0.5, 1: 0.5} actual_result = dataset.conditional_prob_for_group_category( "target", "protected", 1) self.assertDictEqual(expected_result, actual_result) #========================================================================= data = pd.DataFrame({ 'target': [1, 0, 1, 0, 1, 0, 1, 0], 'protected': [0, 1, 0, 1, 0, 1, 0, 1] }) dataset = Dataset(data) expected_result = {0: 1.0, 1: 0.0} actual_result = dataset.conditional_prob_for_group_category( "target", "protected", 1) self.assertDictEqual(expected_result, actual_result) #========================================================================= data = pd.DataFrame({ 'target': [0, 1, 0, 1, 0, 1, 0, 1], 'protected': [0, 1, 0, 1, 0, 1, 0, 1] }) dataset = Dataset(data) expected_result = {0: 0.0, 1: 1.0} actual_result = dataset.conditional_prob_for_group_category( "target", "protected", 1) self.assertDictEqual(expected_result, actual_result)