Ejemplo n.º 1
0
    def test_impact_ratio(self):
        # no discrimination
        data = pd.DataFrame({'target': [1, 1, 0, 0, 1, 1, 0, 0],
                             'protected': [0, 1, 0, 1, 0, 1, 0, 1]})

        dataset = Dataset(data)
        self.assertEqual(1, am.impact_ratio(dataset, "target", "protected"))

        #===========================================================================

        # maximal discrimination
        data = pd.DataFrame({'target': [1, 0, 1, 0, 1, 0, 1, 0],
                             'protected': [0, 1, 0, 1, 0, 1, 0, 1]})

        dataset = Dataset(data)
        self.assertEqual(0, am.impact_ratio(dataset, "target", "protected"))

        #===========================================================================

        # bit of discrimination
        data = pd.DataFrame({'target': [1, 1, 0, 0, 1, 0, 1, 0],
                             'protected': [0, 1, 0, 1, 0, 1, 0, 1]})

        dataset = Dataset(data)
        self.assertEqual(1 / 3, am.impact_ratio(dataset, "target", "protected"))

        #===========================================================================

        # inverse discrimination should return nan, because division by zero is performed
        data = pd.DataFrame({'target': [0, 1, 0, 1, 0, 1, 0, 1],
                             'protected': [0, 1, 0, 1, 0, 1, 0, 1]})

        dataset = Dataset(data)
        self.assertEqual(np.inf, am.impact_ratio(dataset, "target", "protected"))
Ejemplo n.º 2
0
    def test_odds_ratio(self):
        # no discrimination
        data = pd.DataFrame({'target': [1, 1, 0, 0, 1, 1, 0, 0],
                             'protected': [0, 1, 0, 1, 0, 1, 0, 1]})

        dataset = Dataset(data)
        self.assertEqual(1, am.odds_ratio(dataset, "target", "protected"))

        #===========================================================================

        # the probability of being accepted as a protected group member is in this case zero
        # hence should return nan
        data = pd.DataFrame({'target': [1, 0, 1, 0, 1, 0, 1, 0],
                             'protected': [0, 1, 0, 1, 0, 1, 0, 1]})

        dataset = Dataset(data)
        self.assertEqual(np.inf, am.odds_ratio(dataset, "target", "protected"))

        #===========================================================================

        # inverse discrimination
        data = pd.DataFrame({'target': [0, 1, 0, 1, 0, 1, 0, 1],
                             'protected': [0, 1, 0, 1, 0, 1, 0, 1]})

        dataset = Dataset(data)
        self.assertEqual(0, am.odds_ratio(dataset, "target", "protected"))

        #===========================================================================

        # bit of discrimination, value should be greater than one
        data = pd.DataFrame({'target': [1, 1, 0, 0, 1, 0, 1, 0],
                             'protected': [0, 1, 0, 1, 0, 1, 0, 1]})

        dataset = Dataset(data)
        self.assertGreater(am.odds_ratio(dataset, "target", "protected"), 1)
Ejemplo n.º 3
0
    def test_normalized_difference(self):
        # no discrimination
        data = pd.DataFrame({
            'target': [1, 1, 0, 0, 1, 1, 0, 0],
            'protected': [0, 1, 0, 1, 0, 1, 0, 1]
        })

        dataset = Dataset(data)
        self.assertEqual(
            0, am.normalized_difference(dataset, "target", "protected"))

        #===========================================================================

        # maximal discrimination
        data = pd.DataFrame({
            'target': [1, 0, 1, 0, 1, 0, 1, 0],
            'protected': [0, 1, 0, 1, 0, 1, 0, 1]
        })

        dataset = Dataset(data)
        self.assertEqual(
            1, am.normalized_difference(dataset, "target", "protected"))

        #===========================================================================

        # bit of discrimination
        data = pd.DataFrame({
            'target': [1, 1, 0, 0, 1, 0, 1, 0],
            'protected': [0, 1, 0, 1, 0, 1, 0, 1]
        })

        dataset = Dataset(data)
        self.assertEqual(
            0.5, am.normalized_difference(dataset, "target", "protected"))

        #===========================================================================

        # if no-one is selected function would raise zero division error
        data = pd.DataFrame({
            'target': [0, 0, 0, 0, 0, 0, 0, 0],
            'protected': [0, 1, 0, 1, 0, 1, 0, 1]
        })

        dataset = Dataset(data)
        self.assertRaises(ZeroDivisionError, am.normalized_difference, dataset,
                          "target", "protected")

        #===========================================================================

        # if everybody is selected function would raise zero division error
        data = pd.DataFrame({
            'target': [1, 1, 1, 1, 1, 1, 1, 1],
            'protected': [0, 1, 0, 1, 0, 1, 0, 1]
        })

        dataset = Dataset(data)
        self.assertRaises(ZeroDivisionError, am.normalized_difference, dataset,
                          "target", "protected")
Ejemplo n.º 4
0
    def test_dataframeCreation(self):
        dataset = Dataset(THIS_DIR + '/correctFile.csv')
        self.assertEqual((3, 4), dataset.data.shape,
                         "dataset has wrong dimensions")

        with self.assertRaises(ValueError):
            dataset = Dataset(THIS_DIR + '/incorrectFileNoProtected.csv')
        with self.assertRaises(ValueError):
            dataset = Dataset(THIS_DIR + '/incorrectFileNoTarget.csv')
    def test_prob_positive_classification(self):
        data = pd.DataFrame({
            'target': [1, 1, 1, 1, 0, 0, 0, 0],
            'protected': [0, 1, 0, 1, 0, 1, 0, 1]
        })

        dataset = Dataset(data)
        expected = 0.5
        actual = dataset.prob_positive_classification("target")
        self.assertEqual(expected, actual)

        #==============================================================================

        data = pd.DataFrame({
            'target': [0, 0, 0, 0, 0, 0, 0, 0],
            'protected': [0, 1, 0, 1, 0, 1, 0, 1]
        })

        dataset = Dataset(data)
        expected = 0
        actual = dataset.prob_positive_classification("target")
        self.assertEqual(expected, actual)

        #==============================================================================
        data = pd.DataFrame({
            'target': [1, 1, 1, 1, 1, 1, 1, 1],
            'protected': [0, 1, 0, 1, 0, 1, 0, 1]
        })

        dataset = Dataset(data)
        expected = 1
        actual = dataset.prob_positive_classification("target")
        self.assertEqual(expected, actual)
Ejemplo n.º 6
0
    def test_mean_difference(self):
        data = pd.DataFrame({
            'target': [1, 2, 3, 4, 5, 6, 7, 8],
            'protected': [0, 1, 2, 3, 0, 1, 2, 3]
        })

        dataset = Dataset(data)

        expected_result_0 = pd.DataFrame({'target': [-1, -2, -3]},
                                         index=[1, 2, 3],
                                         dtype=np.float64)

        actual_result_0 = am.mean_difference(dataset,
                                             'target',
                                             'protected',
                                             non_protected=0)
        self.assertTrue(expected_result_0.equals(actual_result_0))

        #==========================================================================

        expected_result_1 = pd.DataFrame({'target': [1, -1, -2]},
                                         index=[0, 2, 3],
                                         dtype=np.float64)

        actual_result_1 = am.mean_difference(dataset,
                                             'target',
                                             'protected',
                                             non_protected=1)
        self.assertTrue(expected_result_1.equals(actual_result_1))
Ejemplo n.º 7
0
    def test_normalize_column(self):

        data = pd.DataFrame({
            'target1': [1, 2, 3, 4, 5, 6, 7, 8, 9],
            'protected': [0, 1, 2, 3, 0, 1, 2, 3, 0]
        })
        dataset = Dataset(data)

        expected_result = pd.DataFrame({
            'target1':
            [-0.5, -0.375, -0.25, -0.125, 0, 0.125, 0.25, 0.375, 0.5],
            'protected': [0, 1, 2, 3, 0, 1, 2, 3, 0]
        })
        dataset.normalize_column('target1')
        testing.assert_frame_equal(expected_result,
                                   dataset.data,
                                   check_less_precise=True)
Ejemplo n.º 8
0
    def test_get_all_targets_of_group(self):
        data = pd.DataFrame({
            'target': [1, 1, 1, 1, 0, 0, 0, 0],
            'protected': [0, 1, 0, 1, 0, 1, 0, 1]
        })
        dataset = Dataset(data)

        expected = [1, 1, 0, 0]
        actual = dataset.get_all_targets_of_group("target", "protected", 0)
        self.assertCountEqual(expected, actual)

        actual = dataset.get_all_targets_of_group("target", "protected", 1)
        self.assertCountEqual(expected, actual)

        #=============================================================================

        # if noone of the desired group is in the dataset, should return empty array
        data = pd.DataFrame({
            'target': [1, 1, 1, 1, 0, 0, 0, 0],
            'protected': [1, 1, 1, 1, 1, 1, 1, 1]
        })
        dataset = Dataset(data)

        actual = dataset.get_all_targets_of_group("target", "protected", 0)
        self.assertFalse(actual)
Ejemplo n.º 9
0
    def test_impact_ratio(self):
        # no discrimination
        data = pd.DataFrame({
            'target': [1, 1, 0, 0, 1, 1, 0, 0],
            'protected': [0, 1, 0, 1, 0, 1, 0, 1]
        })

        dataset = Dataset(data)
        self.assertEqual(1, am.impact_ratio(dataset, "target", "protected"))

        #===========================================================================

        # maximal discrimination
        data = pd.DataFrame({
            'target': [1, 0, 1, 0, 1, 0, 1, 0],
            'protected': [0, 1, 0, 1, 0, 1, 0, 1]
        })

        dataset = Dataset(data)
        self.assertEqual(0, am.impact_ratio(dataset, "target", "protected"))

        #===========================================================================

        # discrimination against protected
        data = pd.DataFrame({
            'target': [1, 1, 0, 0, 1, 0, 1, 0],
            'protected': [0, 1, 0, 1, 0, 1, 0, 1]
        })

        dataset = Dataset(data)
        self.assertEqual(0.5, am.impact_ratio(dataset, "target", "protected"))

        #===========================================================================

        # inverse discrimination
        data = pd.DataFrame({
            'target': [0, 1, 0, 1, 0, 1, 0, 1],
            'protected': [0, 1, 0, 1, 0, 1, 0, 1]
        })

        dataset = Dataset(data)
        self.assertEqual(2, am.impact_ratio(dataset, "target", "protected"))
Ejemplo n.º 10
0
def main():
    # check python version
    if sys.version_info[1] < 3.5:
        raise Exception("Please use Python 3.5 or above to run")

    numpy.seterr(all='raise')

    # create the top-level parser
    parser = argparse.ArgumentParser(
        prog='fairness benchmarks',
        description=
        'performs various discrimination group_fairness_metrics on a given dataset',
        epilog="=== === === end === === ===")
    parser.add_argument("-f",
                        "--file",
                        nargs='*',
                        help="provide a dataset as csv-file to the algorithms")
    subparsers = parser.add_subparsers(help='sub-command help')

    # create the parser for the "file" command
    parser_file = subparsers.add_parser(
        'file', help='provide a csv file containing a dataset')
    parser_file.add_argument(dest='file_to_read')

    # run demo
    parser.add_argument("-d",
                        "--demo",
                        dest='demo',
                        action='store_true',
                        help="run all algorithms with an example dataset")
    args = parser.parse_args()

    if (args.demo == True):
        run_demo('demo.csv')
        return

    # read file into dataframe
    if (args.file == None):
        raise ValueError("Please provide a csv-file")

    dataset = Dataset(args.file[0])
Ejemplo n.º 11
0
def run_demo(filename):
    print(
        'Running all measures with an example dataset and prints results to stdout. Please note, that this dataset was created artificially.'
    )
    dataset = Dataset(filename)

    print('=========== difference of means test =============')
    print(t_test_ind(dataset, 'target_score', 'protected_sex'))

    print('\n=========== mean differences ==============')
    print(mean_difference(dataset, 'target_score', 'protected_sex').T)

    print('\n=========== normalized differences ============')
    print(
        normalized_difference(dataset, 'target_loan_approved',
                              'protected_sex'))

    print('\n=========== impact ratio ============')
    print(impact_ratio(dataset, 'target_loan_approved', 'protected_sex'))

    print('\n=========== odds ratio ============')
    print(fisher_exact(dataset, 'target_loan_approved', 'protected_sex'))
Ejemplo n.º 12
0
    def test_count_classification_and_category(self):
        data = pd.DataFrame({
            'target': [1, 1, 1, 1, 1, 0, 0, 0],
            'protected': [0, 1, 0, 1, 0, 1, 0, 1]
        })
        dataset = Dataset(data)

        self.assertEqual(
            1,
            dataset.count_classification_and_category("target", "protected", 0,
                                                      0))
        self.assertEqual(
            3,
            dataset.count_classification_and_category("target", "protected", 0,
                                                      1))
        self.assertEqual(
            2,
            dataset.count_classification_and_category("target", "protected", 1,
                                                      0))
        self.assertEqual(
            2,
            dataset.count_classification_and_category("target", "protected", 1,
                                                      1))
Ejemplo n.º 13
0
    def test_conditional_prob_of_acceptance(self):
        data = pd.DataFrame({
            'target': [1, 1, 1, 1, 1, 1, 1, 1],
            'protected': [0, 1, 0, 1, 0, 1, 0, 1]
        })
        dataset = Dataset(data)

        expected_result = {0: 1.0, 1: 1.0}
        actual_result = dataset.conditional_prob_for_group_category(
            "target", "protected", 1)
        self.assertDictEqual(expected_result, actual_result)

        #========================================================================

        data = pd.DataFrame({
            'target': [0, 0, 0, 0, 0, 0, 0, 0],
            'protected': [0, 1, 0, 1, 0, 1, 0, 1]
        })
        dataset = Dataset(data)

        expected_result = {0: 0.0, 1: 0.0}
        actual_result = dataset.conditional_prob_for_group_category(
            "target", "protected", 1)
        self.assertDictEqual(expected_result, actual_result)

        #=========================================================================

        data = pd.DataFrame({
            'target': [1, 1, 1, 1, 0, 0, 0, 0],
            'protected': [0, 1, 0, 1, 0, 1, 0, 1]
        })
        dataset = Dataset(data)

        expected_result = {0: 0.5, 1: 0.5}
        actual_result = dataset.conditional_prob_for_group_category(
            "target", "protected", 1)
        self.assertDictEqual(expected_result, actual_result)

        #=========================================================================

        data = pd.DataFrame({
            'target': [1, 0, 1, 0, 1, 0, 1, 0],
            'protected': [0, 1, 0, 1, 0, 1, 0, 1]
        })
        dataset = Dataset(data)

        expected_result = {0: 1.0, 1: 0.0}
        actual_result = dataset.conditional_prob_for_group_category(
            "target", "protected", 1)
        self.assertDictEqual(expected_result, actual_result)

        #=========================================================================

        data = pd.DataFrame({
            'target': [0, 1, 0, 1, 0, 1, 0, 1],
            'protected': [0, 1, 0, 1, 0, 1, 0, 1]
        })
        dataset = Dataset(data)

        expected_result = {0: 0.0, 1: 1.0}
        actual_result = dataset.conditional_prob_for_group_category(
            "target", "protected", 1)
        self.assertDictEqual(expected_result, actual_result)