예제 #1
0
    def splitDataFrame(self, dataSet, sensitiveVal):
        sensitiveAttr = dataSet.protectedAttribute
        df = dataSet.trainDataFrame
        ds = DataSet()
        ds.fileName = dataSet.fileName
        ds.protectedAttribute = sensitiveAttr
        ds.trueLabels = dataSet.trueLabels
        ds.trainHeaders = dataSet.trainHeaders
        ds.numAttributes = dataSet.numAttributes

        try:
            ds.trainDataFrame = df.groupby([sensitiveAttr
                                            ]).get_group(sensitiveVal)
        except:
            return 0
        return ds
예제 #2
0
    def determineGroups(self, dataSet):
        df = dataSet.testDataFrame
        possibleGroups = df[dataSet.protectedAttribute].unique()

        organizedDataSetList = []
        for value in possibleGroups:
            # Setting up the group as a new DataSet
            newDataSet = DataSet()
            newDataSet.fileName = dataSet.fileName
            newDataSet.testDataFrame = df[df[dataSet.protectedAttribute] ==
                                          value]
            newDataSet.protectedAttribute = dataSet.protectedAttribute
            newDataSet.trueLabels = dataSet.trueLabels
            newDataSet.headers = dataSet.headers
            newDataSet.testHeaders = dataSet.testHeaders
            newDataSet.numAttributes = dataSet.numAttributes

            #resets indices for later indexing
            newDataSet.testDataFrame.reset_index(inplace=True, drop=True)

            organizedDataSetList.append(newDataSet)

        return organizedDataSetList, possibleGroups