Esempio n. 1
0
    def __init__(self,
                 X,
                 y=None,
                 actionset=None,
                 optimizer=_SOLVER_TYPE_CPX,
                 decision_threshold=None,
                 **clf_args):
        """
        Run an audit on a classifier.

        :param optimizer:
        :param clf:
        :param coefficients:
        :param intercept:
        :param actionset:
        """
        ## set clf and coefficients
        self.__parse_clf_args(clf_args)

        ### actionset
        self.actionset = actionset
        self.X = X

        if not self.actionset:
            warnings.warn(
                "No actionset provided, instantiating with defaults: all features mutable, all features percentile."
            )
            if not self.X:
                raise ("No actionset or X provided.")
            self.actionset = ActionSet(X=self.X)
            self.actionset.align(self.coefficients)

        self.optimizer = optimizer
        self.decision_threshold = decision_threshold
Esempio n. 2
0
def my_actionset_fake():
    """Action set with fake values."""
    ## default values for "X" (dataset) and "names" (column names).
    X = np.array([[1, 2, 3], [2, 3, 4]])
    names = ['a', 'b', 'c']

    ## initialize.
    return ActionSet(X, names=names)
Esempio n. 3
0
def german_actionset_unaligned(german_X):
    """Generate an actionset for German data."""
    # setup actionset
    action_set = ActionSet(X=german_X)
    immutable_attributes = [
        'Age', 'Single', 'JobClassIsSkilled', 'ForeignWorker', 'OwnsHouse',
        'RentsHouse'
    ]
    action_set[immutable_attributes].mutable = False
    action_set['CriticalAccountOrLoansElsewhere'].step_direction = -1
    action_set['CheckingAccountBalance_geq_0'].step_direction = 1
    return action_set
Esempio n. 4
0
def action_set(request, data):
    """Generate an action_set for German data."""
    # setup action_set

    action_set = ActionSet(X = data['X'])
    if request.param == 'immutable' and data['data_name'] == 'german':
        immutable_attributes = ['Age', 'Single', 'JobClassIsSkilled', 'ForeignWorker', 'OwnsHouse', 'RentsHouse']
        action_set[immutable_attributes].mutable = False
        action_set['CriticalAccountOrLoansElsewhere'].step_direction = -1
        action_set['CheckingAccountBalance_geq_0'].step_direction = 1

    return action_set
Esempio n. 5
0
def test_rb_onehot_encoding(data, solver):

    if len(data['categorical_names']) == 1:

        # pick only the indicator variables
        names = data['onehot_names']
        k = len(names)
        X = data['X'][names]
        assert np.all(X.sum(axis=1) == 1)

        #setup classifier of the form
        #w = [3, -1, -1, -1,...]
        #t = -1
        # score(x[0] = 1) =  3 -> yhat = +1
        # score(x[j] = 1) = -2 -> yhat = -1 for j = 1,2,...,k
        coefs = -np.ones(k)
        coefs[0] = 3.0
        intercept = -1.0

        # setup action set
        a = ActionSet(X)
        a.add_constraint('subset_limit', names=names, lb=0, ub=1)
        a.set_alignment(coefficients=coefs, intercept=intercept)
        rb = RecourseBuilder(action_set=a,
                             coefficients=coefs,
                             intercept=intercept,
                             solver=solver)
        for j in range(1, k):

            x = np.zeros(k)
            x[j] = 1.0
            assert rb.score(x) < 0

            # set point
            rb.x = x

            # find optimal action
            info = rb.fit()
            a = info['actions']

            # validate solution
            x_new = x + a
            assert rb.score(x_new) > 0
            assert np.isclose(a[j], -1.0)
            assert np.isclose(np.sum(x_new), 1.0)
Esempio n. 6
0
def as_result_file(name, extension='pdf', header=file_header):
    return os.path.join(demo_results_dir, '%s.%s' % (name, extension))


## load and process data
german_df = pd.read_csv(data_file).reset_index(drop=True)
# german_df = german_df.assign(isMale=lambda df: (df['Gender']=='Male').astype(int))#.drop(['PurposeOfLoan', 'Gender', 'OtherLoansAtStore'], axis=1)
y = german_df['GoodCustomer']
X = (german_df.drop('GoodCustomer', axis=1).drop(
    ['PurposeOfLoan', 'Gender', 'OtherLoansAtStore'], axis=1))

## set up actionset
gender_weight = german_df.assign(
    c=1).groupby('Gender')['c'].transform(lambda s: s * 1. / len(s))
X_gender_balanced = X.sample(n=len(X) * 3, replace=True, weights=gender_weight)
action_set = ActionSet(X=X_gender_balanced)
action_set['Age'].mutable = False
action_set['Single'].mutable = False
action_set['JobClassIsSkilled'].mutable = False
action_set['ForeignWorker'].mutable = False
action_set['OwnsHouse'].mutable = False
action_set['RentsHouse'].mutable = False
action_set['CriticalAccountOrLoansElsewhere'].step_direction = -1
action_set['CheckingAccountBalance_geq_0'].step_direction = 1
# action_set['isMale'].mutable = False

## dummy model
clf = LogisticRegression(max_iter=1000, solver='lbfgs')
# grid = GridSearchCV(
#     clf, param_grid={'C': np.logspace(-4, 3)},
#     cv=10,
Esempio n. 7
0
class Auditor(object):
    def __init__(self,
                 X,
                 y=None,
                 actionset=None,
                 optimizer=_SOLVER_TYPE_CPX,
                 decision_threshold=None,
                 **clf_args):
        """
        Run an audit on a classifier.

        :param optimizer:
        :param clf:
        :param coefficients:
        :param intercept:
        :param actionset:
        """
        ## set clf and coefficients
        self.__parse_clf_args(clf_args)

        ### actionset
        self.actionset = actionset
        self.X = X

        if not self.actionset:
            warnings.warn(
                "No actionset provided, instantiating with defaults: all features mutable, all features percentile."
            )
            if not self.X:
                raise ("No actionset or X provided.")
            self.actionset = ActionSet(X=self.X)
            self.actionset.align(self.coefficients)

        self.optimizer = optimizer
        self.decision_threshold = decision_threshold

    def __parse_clf_args(self, args):

        assert 'clf' in args or ('coefficients' in args)

        if 'clf' in args:

            clf = args['clf']
            self.coefficients = np.array(clf.coef_).flatten()
            self.intercept = float(clf.intercept_)

        elif 'coefficients' in args:
            self.coefficients = args['coefficients']
            self.intercept = args['intercept'] if 'intercept' in args else 0.0

    def get_negative_points(self):
        scores = self.clf.predict_proba(self.X)[:, 1]
        return np.where(scores < self.decision_threshold)[0]

    def audit(self, num_cases=None):

        ### TODO: bake decision threshold into the optimizer.

        denied_individuals = self.get_negative_points()
        ## downsample
        if num_cases and num_cases < len(denied_individuals):
            denied_individuals = np.random.choice(denied_individuals,
                                                  num_cases)

        if not any(self.actionset.aligned):
            self.actionset.align(self.coefficients)

        ## run flipsets
        idx = 0
        flipsets = {}
        now = time.time()
        for i in denied_individuals:
            if idx % 50 == 0:
                print('finished %d points in %f...' % (idx, time.time() - now))
                now = time.time()

            x = self.X[i]
            fb = RecourseBuilder(optimizer=self.optimizer,
                                 coefficients=self.coefficients,
                                 intercept=self.intercept,
                                 action_set=self.actionset,
                                 x=x)

            output = fb.fit()
            flipsets[i] = output.get('total_cost') or output.get('max_cost')
            idx += 1

        return flipsets
Esempio n. 8
0
from recourse.paths import *
from recourse.action_set import ActionSet
from recourse.flipset import Flipset

data_name = 'german'
data_file = test_dir / ('%s_processed.csv' % data_name)

## load dataset
data_df = pd.read_csv(data_file)
outcome_name = data_df.columns[0]
y = data_df[outcome_name]
X = data_df.drop(
    [outcome_name, 'Gender', 'PurposeOfLoan', 'OtherLoansAtStore'], axis=1)

# setup actionset
action_set = ActionSet(X=X)
immutable_attributes = [
    'Age', 'Single', 'JobClassIsSkilled', 'ForeignWorker', 'OwnsHouse',
    'RentsHouse'
]
action_set[immutable_attributes].mutable = False
action_set['CriticalAccountOrLoansElsewhere'].step_direction = -1
action_set['CheckingAccountBalance_geq_0'].step_direction = 1

# fit classifier
clf = LogisticRegression(max_iter=1000, solver='lbfgs')
clf.fit(X, y)

denied_idx = np.flatnonzero(clf.predict(X) < 0)
i = denied_idx[0]
Esempio n. 9
0
custom_bounds = None
immutable_variables = []

if settings['data_name'] == 'credit':

    immutable_names = ['Female', 'Single', 'Married']
    immutable_names += list(
        filter(lambda x: 'Age' in x or 'Overdue' in x, data['variable_names']))
    default_bounds = (0.1, 99.9, 'percentile')
    custom_bounds = {'Female': (0, 100, 'p'), 'Married': (0, 100, 'p')}
    data['immutable_variable_names'] = [
        n for n in immutable_names if n in data['variable_names']
    ]

    action_set = ActionSet(X=data['X'],
                           custom_bounds=custom_bounds,
                           default_bounds=default_bounds)
    action_set[data['immutable_variable_names']].mutable = False

    action_set['EducationLevel'].step_direction = 1

    payment_fields = list(
        filter(lambda x: 'Amount' in x, data['variable_names']))
    action_set[payment_fields].step_type = 'absolute'
    action_set[payment_fields].step_size = 5

    for p in payment_fields:
        action_set[p].update_grid()

#### Initialize Model Files ####
model_stats = pickle.load(open(settings['model_file'], 'rb'))
intercept['downsampled'] = clf_age_limited.intercept_[0]

# utilization bounded
# RealEstate should be positive
# numtimes90 days > 0
# monthly income >0
# debt
p = .98
# generate flipsets
for dataset in ['full', 'downsampled']:
    y_col = 'y_%s_score' % dataset
    scores = exp_df_sample[y_col]
    # p = scores.median()
    denied_individuals = scores.loc[lambda s: s <= p].index
    # actionset
    action_set = ActionSet(X=X_audit_holdout)
    action_set['age'].mutable = False
    action_set['NumberOfDependents'].mutable = False
    action_set['DebtRatio'].step_direction = -1
    # action_set['NumberOfTime60-89DaysPastDueNotWorse'].step_direction = -1
    action_set.align(coefficients=coefficients[dataset])

    idx = 0
    flipsets = {}
    import time
    now = time.time()
    for i in denied_individuals:
        if idx % 100 == 0:
            print('finished %d points in %f...' % (idx, time.time() - now))
            now = time.time()
Esempio n. 11
0
#### Initialize Actionset ####

default_bounds = (0.1, 99.9, 'percentile')
custom_bounds = None
immutable_variables = []


if settings['data_name'] == 'credit':

    immutable_names = ['Female', 'Single', 'Married']
    immutable_names += list(filter(lambda x: 'Age' in x or 'Overdue' in x, data['variable_names']))
    default_bounds = (0.1, 99.9, 'percentile')
    custom_bounds = {'Female': (0, 100, 'p'),  'Married': (0, 100, 'p')}
    data['immutable_variable_names'] = [n for n in immutable_names if n in data['variable_names']]

    action_set = ActionSet(X = data['X'], custom_bounds = custom_bounds, default_bounds = default_bounds)
    action_set[data['immutable_variable_names']].mutable = False

    payment_fields = list(filter(lambda x: 'Amount' in x, data['variable_names']))
    action_set[payment_fields].step_type = 'absolute'
    action_set[payment_fields].step_size = 5
    for p in payment_fields:
        action_set[p].update_grid()

    action_set['EducationLevel'].step_direction = 1
    action_set['MaxBillAmountOverLast6Months'].step_direction = -1
    action_set['MaxPaymentAmountOverLast6Months'].step_direction = 1



#### Initialize Model Files ####
def as_result_file(name, extension = 'pdf', header = file_header):
    return os.path.join(demo_results_dir, '%s.%s' % (name, extension))

## load and process data
german_df = pd.read_csv(data_file).reset_index(drop=True)
# german_df = german_df.assign(isMale=lambda df: (df['Gender']=='Male').astype(int))#.drop(['PurposeOfLoan', 'Gender', 'OtherLoansAtStore'], axis=1)
y = german_df['GoodCustomer']
X = (german_df.drop('GoodCustomer', axis=1)
     .drop(['PurposeOfLoan', 'Gender', 'OtherLoansAtStore'], axis=1)
     )

## set up actionset
gender_weight = german_df.assign(c=1).groupby('Gender')['c'].transform(lambda s: s*1./len(s))
X_gender_balanced = X.sample(n = len(X)*3, replace=True, weights=gender_weight)
action_set = ActionSet(X = X_gender_balanced)
action_set['Age'].mutable = False
action_set['Single'].mutable = False
action_set['JobClassIsSkilled'].mutable = False
action_set['ForeignWorker'].mutable = False
action_set['OwnsHouse'].mutable = False
action_set['RentsHouse'].mutable = False
action_set['CriticalAccountOrLoansElsewhere'].step_direction = -1
action_set['CheckingAccountBalance_geq_0'].step_direction = 1
# action_set['isMale'].mutable = False

clf = LogisticRegression(max_iter=1000, solver='lbfgs')
grid = GridSearchCV(
    clf, param_grid={'C': np.logspace(-4, 3)},
    cv=10,
    scoring='roc_auc',