def __createUnfairRanking(self): unfairRanking = [] for i in range(1000, 500, -1): unfairRanking.append(Candidate(i, [])) for i in range(500, 0, -1): unfairRanking.append(Candidate(i, ["Female"])) return unfairRanking
def create(self): print("creating SAT candidate set") """ creates the actual candidate objects such that the data set has the same distribution as given in the SAT table """ protectedCandidates = [] nonProtectedCandidates = [] for index in range(len(self._scores)): print('.', end='', flush=True) score = self._scores[index] # create protected candidates of given score for i in range(self._number_protected[index]): protectedCandidates.append(Candidate(score, ["protected"])) # create non-protected candidates of given score for i in range(self._number_nonprotected[index]): nonProtectedCandidates.append(Candidate(score, [])) normalizeQualifications(protectedCandidates + nonProtectedCandidates) protectedCandidates.sort(key=lambda candidate: candidate.qualification, reverse=True) nonProtectedCandidates.sort(key=lambda candidate: candidate.qualification, reverse=True) print(" [Done]") return protectedCandidates, nonProtectedCandidates
def __createFairRanking(self): fairRanking = [] for i in range(1000, 0, -1): if i % 2 == 1: fairRanking.append(Candidate(i, ["Female"])) else: fairRanking.append(Candidate(i, [])) return fairRanking
def testSelectionUnfairness(self): notSelected = self.__unfairRanking[:500] ranking = [] ranking.append(Candidate(1, ["female"])) self.assertEqual(-999, selectionUtility(ranking, notSelected)) ranking[0] = Candidate(1001, ["female"]) self.assertEqual(0, selectionUtility(ranking, notSelected), "no quality inversion, should\ be therefore zero unfair")
def __readFileOfQuery(self, filename): """ takes one .json file and reads all information, creates candidate objects from these information and sorts them into 3 arrays. One contains all protected candidates, one contains all non-protected candidates, one contains all candidates in the same order as they appear in the json-file @param filename: the json's filename @return: key: the search query string protected: array that contains all protected candidates nonProtected: array that contains all nonProtected candidates """ protected = [] nonProtected = [] originalOrdering = [] currentfile = open(filename) data = json.load(currentfile) xingSearchQuery = data['category'] # if the Xing search query results in a gender neutral list, # we take female as the protected attribute protectedAttribute = 'm' if data['dominantSexXing'] == 'f' else 'f' for r in data['profiles']: # determine Member since / Hits if 'memberSince_Hits' in r['profile'][0]: hits_string = r['profile'][0]['memberSince_Hits'] hits = hits_string.split(' / ')[1] else: hits = 1 work_experience = self.__determineWorkMonths(r) edu_experience = self.__determineEduMonths(r) score = (work_experience + edu_experience) * int(hits) if self.__determineIfProtected(r, protectedAttribute): protected.append(Candidate(score, [protectedAttribute])) originalOrdering.append(Candidate(score, [protectedAttribute])) else: nonProtected.append(Candidate(score, [])) originalOrdering.append(Candidate(score, [])) protected.sort(key=lambda candidate: candidate.qualification, reverse=True) nonProtected.sort(key=lambda candidate: candidate.qualification, reverse=True) normalizeQualifications(protected + nonProtected) normalizeQualifications(originalOrdering) currentfile.close() return xingSearchQuery, protected, nonProtected, originalOrdering
def generateCandidateList(isProtected, numCandidates): candidates = [] if isProtected: # i serves also as the qualification criterion of an individual candidate, even though # we don't need that right away for i in range(numCandidates): candidates.append(Candidate(i, ["female"])) else: for i in range(numCandidates): candidates.append(Candidate(i, [])) return candidates
def testCreateFeldmanRanking(self): """ creates 500 protected candidates and 1000 non-protected candidates into two arrays, ordered by descending qualifications """ protectedCandidates = [] nonProtectedCandidates = [] k = 500 lastIndex = 0 for i in range(1, k + 1): protectedCandidates.append(Candidate(i, ["female"])) lastIndex += 1 for i in range(1, k + 1501): nonProtectedCandidates.append(Candidate(2 * i, [])) lastIndex += 1 protectedCandidates.sort(key=lambda candidate: candidate.qualification, reverse=True) nonProtectedCandidates.sort( key=lambda candidate: candidate.qualification, reverse=True) protectedForExpectedResult = protectedCandidates[:] nonProtectedForExpectedResult = nonProtectedCandidates[:] # build a result that looks like the one produced in this case expectedResult = [] fiveNonProtectedInbetween = 4 for i in range(k): if fiveNonProtectedInbetween == 4: expectedResult.append(protectedForExpectedResult.pop(0)) fiveNonProtectedInbetween = 0 else: expectedResult.append(nonProtectedForExpectedResult.pop(0)) fiveNonProtectedInbetween += 1 result = feldmanRanking(protectedCandidates, nonProtectedCandidates, k)[0] for candidate in result: candidate.qualification = int(candidate.qualification) expectedQualifications = [ expectedResult[i].isProtected for i in range(len(expectedResult)) ] actualQualifications = [ result[i].isProtected for i in range(len(result)) ] self.assertEqual(expectedQualifications, actualQualifications)
def __separateGroups(self, protAttr, protAttrName): ''' separates data into two lists with protected and non-protected candidate objects @param protAttr: int, defines protection status @param protAttrName: string, defines protection status in words ''' for _, row in self.__data.iterrows(): stuffToSave = { "query_id" : row["query_id"], "position" : row["position"], } if row['prot_attr'] == protAttr: self.__protectedCandidates.append(Candidate(row['score'], [protAttrName], stuffToSave=stuffToSave)) else: self.__nonprotectedCandidates.append(Candidate(row['score'], [], stuffToSave=stuffToSave)) self.__protectedCandidates.sort(key=lambda candidate: candidate.qualification, reverse=True) self.__nonprotectedCandidates.sort(key=lambda candidate: candidate.qualification, reverse=True)
def setUp(self): """ creates k protected and k non-protected candidates into two arrays. Ordered by descending qualifications """ self.__protectedCandidates = [] self.__nonProtectedCandidates = [] self.__k = 200 for count in range(self.__k): protected = Candidate(count, ["female"]) nonProtected = Candidate(count + self.__k, []) self.__nonProtectedCandidates.append(nonProtected) self.__protectedCandidates.append(protected) self.__nonProtectedCandidates.sort( key=lambda candidate: candidate.qualification, reverse=True) self.__protectedCandidates.sort( key=lambda candidate: candidate.qualification, reverse=True)
def createRace(filename, *columnsToRead): """ currently working with recidivism score as qualification attribute in candidates. Change index to try with other columns """ nonProtected = [] protected = [] with open(filename) as csvfile: data = pd.read_csv(csvfile, usecols=columnsToRead) for row in data.itertuples(): # change to different index in row[.] to access other columns from csv file if row[4] == 0: nonProtected.append(Candidate(1 - row[3], [])) else: protected.append(Candidate(1 - row[3], ["black"])) # sort candidates by decile scores in COMPAS protected.sort(key=lambda candidate: candidate.qualification, reverse=True) nonProtected.sort(key=lambda candidate: candidate.qualification, reverse=True) return protected, nonProtected
def create(filename, *columnsToRead, protectedAttribute): """ currently working with credit worthiness score as qualification attribute in candidates. Change column index in data frame to try with other columns dataset already normalized, therefore no normalization done """ protected = [] nonProtected = [] with open(filename) as csvfile: data = pd.read_csv(csvfile, usecols=columnsToRead) for row in data.itertuples(): # change to different index in row[.] to access other columns from csv file if row[4] == 0: nonProtected.append(Candidate(row[3], [])) else: protected.append(Candidate(row[3], protectedAttribute)) protected.sort(key=lambda candidate: candidate.qualification, reverse=True) nonProtected.sort(key=lambda candidate: candidate.qualification, reverse=True) return protected, nonProtected
def setUpClass(cls): """ creates 20 protected and 20 non-protected candidates and ranks them into 3 rankings: - one that contains only protected candidates - one that contains only non-protected - one that alternates between a protected and a non-protected, starting with a protected candidate at position 0 """ cls.__fairRankingHalfHalf = [] cls.__unfairRankingOnlyNonProtected = [] cls.__unfairRankingOnlyProtected = [] for count in range(20): protected = Candidate(count, ["female"]) nonProtected = Candidate(count, []) cls.__unfairRankingOnlyNonProtected.append(nonProtected) cls.__unfairRankingOnlyProtected.append(protected) # put a protected candidate each even round, a nonProtected each odd round if count % 2: cls.__fairRankingHalfHalf.append(protected) else: cls.__fairRankingHalfHalf.append(nonProtected)
def testOrderingUnfairness(self): ranking1 = [Candidate(10, []), Candidate(9, []), Candidate(8, []), Candidate(6, ["female"]), Candidate(5, ["female"]), Candidate(7, [])] self.assertEqual((2, -2), orderingUtility(ranking1)) ranking2 = [Candidate(10, []), Candidate(9, []), Candidate(8, []), Candidate(7, []), Candidate(6, ["female"]), Candidate(5, ["female"])] self.assertEqual((0, 0), orderingUtility(ranking2)) ranking3 = [Candidate(1000, []), Candidate(996, ["female"]), Candidate(998, []), Candidate(997, []), Candidate(995, []), Candidate(994, []), Candidate(990, ["female"]), Candidate(989, ["female"]), Candidate(993, []), Candidate(993, [])] self.assertEqual((2, -4), orderingUtility(ranking3))