def splitDataFrame(self, dataSet, sensitiveVal): sensitiveAttr = dataSet.protectedAttribute df = dataSet.trainDataFrame ds = DataSet() ds.fileName = dataSet.fileName ds.protectedAttribute = sensitiveAttr ds.trueLabels = dataSet.trueLabels ds.trainHeaders = dataSet.trainHeaders ds.numAttributes = dataSet.numAttributes try: ds.trainDataFrame = df.groupby([sensitiveAttr ]).get_group(sensitiveVal) except: return 0 return ds
def determineGroups(self, dataSet): df = dataSet.testDataFrame possibleGroups = df[dataSet.protectedAttribute].unique() organizedDataSetList = [] for value in possibleGroups: # Setting up the group as a new DataSet newDataSet = DataSet() newDataSet.fileName = dataSet.fileName newDataSet.testDataFrame = df[df[dataSet.protectedAttribute] == value] newDataSet.protectedAttribute = dataSet.protectedAttribute newDataSet.trueLabels = dataSet.trueLabels newDataSet.headers = dataSet.headers newDataSet.testHeaders = dataSet.testHeaders newDataSet.numAttributes = dataSet.numAttributes #resets indices for later indexing newDataSet.testDataFrame.reset_index(inplace=True, drop=True) organizedDataSetList.append(newDataSet) return organizedDataSetList, possibleGroups