def __init__(self, action_set, **kwargs):
        """
        :param action_set: ActionSet  for features
        :param clf: scikit-learn linear classifier
        :param coefficients: vector of coefficients (only used when clf is not specified)
        :param intercept: set to 0.0 by default (only used when clf is not specified)
        :param solver: valid MIP solver
        """

        # action_set
        assert isinstance(action_set, ActionSet)
        self.action_set = action_set

        # attach coefficients
        self.coefficients, self.intercept = parse_classifier_args(**kwargs)

        # align coefficients to action set
        self.action_set.align(self.coefficients)

        # set solver
        self.solver = kwargs.get('solver', DEFAULT_SOLVER)

        # setup recourse problem
        self.builder = RecourseBuilder(coefficients=self.coefficients,
                                       intercept=self.intercept,
                                       action_set=self.action_set,
                                       solver=self.solver)

        self._print_flag = kwargs.get('print_flag', self._default_print_flag)
    def populate(self,
                 total_items=10,
                 enumeration_type='distinct_subsets',
                 cost_type='local',
                 time_limit=None,
                 node_limit=None,
                 display_flag=None):
        """
        Generates a list of actions to flip the predicted value of the linear classifier from feature vector x.

        :param total_items: maximum # of actions to generate
                            set as float('inf') to enumerate all possible actions

        :param enumeration_type: enumeration algorithm to use for the flipset
                            must be a string in Flipset.valid_enumeration_types
                            - 'distinct_subsets'
                            - 'mutually_exclusive'

        :param cost_type: cost function to use for Flipset generation
                          must be a string in Flipset.valid_cost_types
                            options include:
                            - local

        :param time_limit: max # of seconds to spend before stopping the solver at each iteration

        :param node_limit: max # of branch and bound nodes to process before stopping the solver at each iteration

        :param display_flag: True to display solver progress during enumeration

        :return:
        """
        assert enumeration_type in self._valid_enumeration_types, \
            'enumeration_type must be one of %r' % self._valid_enumeration_types

        assert cost_type in self._valid_cost_types, \
            'cost_type must be one of %r' % self._valid_cost_types

        # print("before RecourseBuilder")
        if self._builder is None:
            self._builder = RecourseBuilder(action_set=self.action_set,
                                            x=self.x,
                                            coefficients=self._coefs,
                                            intercept=self._intercept,
                                            mip_cost_type=cost_type,
                                            solver=self._solver)
        # print("FLIPSET ACTIONS: ", self._build_er.actions)
        # print("end RecourseBuilder")

        items = self._builder.populate(total_items=total_items,
                                       enumeration_type=enumeration_type,
                                       time_limit=time_limit,
                                       node_limit=node_limit,
                                       display_flag=display_flag)
        self._add(items)
        return self
Beispiel #3
0
def recourse_builder(request, classifier, action_set):
    action_set.align(classifier)
    rb = RecourseBuilder(solver = request.param,
                         action_set = action_set,
                         clf = classifier)

    return rb
Beispiel #4
0
def recourse_builder_cbc(classifier, action_set):
    action_set.align(classifier)
    rb = RecourseBuilder(solver = _SOLVER_TYPE_CBC,
                         action_set = action_set,
                         clf = classifier)

    return rb
Beispiel #5
0
def my_recourse_builder_cplex_fake(my_actionset_fake):
    """CPLEX Recourse builder with fake values."""
    x = [1, 1, 1]
    coefficients = [1, 2, 3]
    return RecourseBuilder(solver="cplex",
                           action_set=my_actionset_fake,
                           coefficients=coefficients,
                           x=x)
def get_flipset_solutions(model,
                          data,
                          action_set,
                          mip_cost_type='max',
                          scaler=None,
                          print_flag=True):
    """
    Run a basic audit of a model on the training dataset.

    :param model:
    :param data:
    :param action_set:
    :param mip_cost_type:
    :param scaler:
    :return:
    """

    if scaler is not None:
        yhat = model.predict(data['X_scaled'])
        coefficients, intercept = undo_coefficient_scaling(
            coefficients=np.array(model.coef_).flatten(),
            intercept=model.intercept_[0],
            scaler=scaler)
    else:
        yhat = model.predict(data['X'])
        coefficients, intercept = np.array(
            model.coef_).flatten(), model.intercept_[0]

    action_set.align(coefficients)

    # get defaults
    audit_results = []
    predicted_neg = np.flatnonzero(yhat < 1)

    if any(predicted_neg):

        U = data['X'].iloc[predicted_neg].values
        fb = RecourseBuilder(coefficients=coefficients,
                             intercept=intercept,
                             action_set=action_set,
                             x=U[0],
                             mip_cost_type=mip_cost_type)

        # basic audit
        start_time = time.time()
        if print_flag:
            for i, u in enumerate(U):
                fb.x = u
                info = fb.fit()
                audit_results.append(info)
                print_log('cost[%06d] = %1.2f' % (i, info['total_cost']))
        else:
            for i, u in enumerate(U):
                fb.x = u
                audit_results.append(fb.fit())

        print_log('runtime: solved %i IPs in %1.1f seconds' %
                  (i, time.time() - start_time))

    return audit_results
def test_rb_onehot_encoding(data, solver):

    if len(data['categorical_names']) == 1:

        # pick only the indicator variables
        names = data['onehot_names']
        k = len(names)
        X = data['X'][names]
        assert np.all(X.sum(axis=1) == 1)

        #setup classifier of the form
        #w = [3, -1, -1, -1,...]
        #t = -1
        # score(x[0] = 1) =  3 -> yhat = +1
        # score(x[j] = 1) = -2 -> yhat = -1 for j = 1,2,...,k
        coefs = -np.ones(k)
        coefs[0] = 3.0
        intercept = -1.0

        # setup action set
        a = ActionSet(X)
        a.add_constraint('subset_limit', names=names, lb=0, ub=1)
        a.set_alignment(coefficients=coefs, intercept=intercept)
        rb = RecourseBuilder(action_set=a,
                             coefficients=coefs,
                             intercept=intercept,
                             solver=solver)
        for j in range(1, k):

            x = np.zeros(k)
            x[j] = 1.0
            assert rb.score(x) < 0

            # set point
            rb.x = x

            # find optimal action
            info = rb.fit()
            a = info['actions']

            # validate solution
            x_new = x + a
            assert rb.score(x_new) > 0
            assert np.isclose(a[j], -1.0)
            assert np.isclose(np.sum(x_new), 1.0)
class RecourseAuditor(object):
    """
    Compute feasibility and cost of recourse over a sample of points that were denied access.
    (i.e. this method will not be run on data points that are already qualifying, (eg. y_pred > 0).
    """

    _default_print_flag = True

    def __init__(self, action_set, **kwargs):
        """
        :param action_set: ActionSet  for features
        :param clf: scikit-learn linear classifier
        :param coefficients: vector of coefficients (only used when clf is not specified)
        :param intercept: set to 0.0 by default (only used when clf is not specified)
        :param solver: valid MIP solver
        """

        # action_set
        assert isinstance(action_set, ActionSet)
        self.action_set = action_set

        # attach coefficients
        self.coefficients, self.intercept = parse_classifier_args(**kwargs)

        # align coefficients to action set
        self.action_set.align(self.coefficients)

        # set solver
        self.solver = kwargs.get('solver', DEFAULT_SOLVER)

        # setup recourse problem
        self.builder = RecourseBuilder(coefficients=self.coefficients,
                                       intercept=self.intercept,
                                       action_set=self.action_set,
                                       solver=self.solver)

        self._print_flag = kwargs.get('print_flag', self._default_print_flag)

    @property
    def print_flag(self):
        return self._print_flag

    @print_flag.setter
    def print_flag(self, flag):
        if flag is None:
            self._print_flag = bool(self._default_print_flag)
        elif isinstance(flag, bool):
            self._print_flag = bool(flag)
        else:
            raise AttributeError('print_flag must be boolean or None')

    def audit(self, X, y_desired=1):
        """
        evaluate cost and feasibility of recourse for for each point in X
        that is not assigned a desired outcome

        :param X: feature matrix (np.array or pd.DataFrame)
        :param y_desired: desired label (+1 by default)
        :return: pd.DataFrame containing the feasibility and cost of recourse for each point in X
                 rows that already attain desired outcome have entries: feasible = NaN & cost = NaN
                 rows that are certified to have no recourse have entries: feasible = False & cost = Inf
        """

        if isinstance(X, pd.DataFrame):
            raw_index = X.index.tolist()
            X = X.values
        else:
            raw_index = list(range(X.shape[0]))

        assert isinstance(X, np.ndarray)
        assert X.ndim == 2
        assert X.shape[0] >= 1
        assert X.shape[1] == len(self.coefficients)
        assert np.isfinite(X).all()
        assert float(y_desired) in {1.0, -1.0, 0.0}

        U, distinct_idx = np.unique(X, axis=0, return_inverse=True)
        scores = U.dot(self.coefficients)
        if y_desired > 0:
            audit_idx = np.less(scores, -self.intercept)
        else:
            audit_idx = np.greater_equal(scores, -self.intercept)
        audit_idx = np.flatnonzero(audit_idx)

        # solve recourse problem
        output = []
        pbar = tqdm(total=len(
            audit_idx))  ## stop tqdm from playing badly in ipython notebook.
        for idx in audit_idx:
            self.builder.x = U[idx, :]
            info = self.builder.fit()
            info['idx'] = idx
            output.append({k: info[k] for k in ['feasible', 'cost', 'idx']})
            pbar.update(1)
        pbar.close()

        # add in points that were not denied recourse
        df = pd.DataFrame(output)
        df = df.set_index('idx')

        # include unique points that attain desired label already
        df = df.reindex(range(U.shape[0]))

        # include duplicates of original points
        df = df.iloc[distinct_idx]
        df = df.reset_index(drop=True)
        df.index = raw_index
        return df
Beispiel #9
0
action_set[immutable_attributes].mutable = False
action_set['CriticalAccountOrLoansElsewhere'].step_direction = -1
action_set['CheckingAccountBalance_geq_0'].step_direction = 1

# fit classifier, get median score, and get denied individuals.
clf = LogisticRegression(max_iter=1000, solver='lbfgs')
clf.fit(X, y)
coefficients = clf.coef_[0]
intercept = clf.intercept_[0]
scores = pd.Series(clf.predict_proba(X)[:, 1])
p = scores.median()
denied_individuals = scores.loc[lambda s: s <= p].index

idx = denied_individuals[0]
x = X.values[idx]

## CPLEX
fb_cplex = RecourseBuilder(solver=_SOLVER_TYPE_CPX,
                           coefficients=coefficients,
                           intercept=intercept - (np.log(p / (1. - p))),
                           action_set=action_set,
                           x=x)
fb_cplex.fit()

## CBC
fb_cbc = RecourseBuilder(solver=_SOLVER_TYPE_CBC,
                         coefficients=coefficients,
                         intercept=intercept - (np.log(p / (1. - p))),
                         action_set=action_set,
                         x=x)
fb_cbc.fit()
class Flipset(object):
    """
    List of actions that will flip the predicted value of a classifier from x
    """
    _valid_enumeration_types = VALID_ENUMERATION_TYPES
    _valid_cost_types = VALID_MIP_COST_TYPES

    df_column_names = [
        'cost', 'size', 'features', 'feature_idx', 'x', 'x_new', 'score_new',
        'yhat_new', 'feasible', 'flipped'
    ]

    def __init__(self, x, action_set, solver=DEFAULT_SOLVER, **kwargs):

        # attach action set
        assert isinstance(action_set, ActionSet)
        self.action_set = action_set
        self._n_variables = len(action_set)
        self._variable_names = action_set.name
        self._solver = solver

        # attach feature vector
        assert isinstance(x, (list, np.ndarray))
        self._x = np.array(x, dtype=np.float_).flatten()

        # attach coefficients
        self._coefs, self._intercept = parse_classifier_args(**kwargs)

        # initialize Flipset attributes
        self._builder = kwargs.get('builder')
        self._items = []
        self._df = pd.DataFrame(columns=Flipset.df_column_names, dtype=object)
        self._sort_args = {
            'by': ['size', 'cost', 'score_new'],
            'inplace': True,
            'axis': 0
        }

    def __len__(self):
        """
        :return: # of items in the flipset
        """
        return len(self._items)

    def __str__(self):
        return str(self._df[['cost', 'size', 'features', 'x', 'x_new']])

    def __repr__(self):
        s = [
            'Flipset with %d Items',
            '# items: %d' % len(self),
            'x: %r' % self._x,
            'w: (%s)' % self._coefs,
            'items: %r' % self._items
        ]
        return '\n'.join(s)

    ### properties ###
    @property
    def x(self):
        """
        :return: feature vector
        """
        return self._x

    @property
    def items(self):
        """
        Dictionary representation of Flipset
        This is a list containing mildly processed output from recourse.builder
        """
        return self._items

    @property
    def df(self):
        """
        Pandas DataFrame representation of Flipset
        Each row represents a different action to flip the prediction
        Rows are sorted according to the last arguments passed to the Flipset.sort()

        DESCRIPTION OF COLUMNS
        ----------------------
        cost:           cost of action
        size:           # of altered features
        features:       names of altered variables
        feature_idx:    column indices of altered features
        x:              values of x[j] for j in feature_idx
        x_new:          values of x[j] + a[j]) for j in feature_idx
        score_new:      values of score function at x_new = x + a = w0 + w.dot(x + a)
        yhat_new:       value of prediction at x_new = x + a = f(score_new)
        feasible:       True if x + a is a feasible action
        flipped:        True if the f(x) != f(x+a)
        """
        return self._df

    @property
    def yhat(self):
        return self._intercept + np.dot(self._coefs, self._x)

    def predict(self, actions=None):
        return np.sign(self.score(actions=actions))

    def score(self, actions=None):
        if actions is not None:
            return self._intercept + np.dot(self._coefs, self._x + actions)
        else:
            return self._intercept + np.dot(self._coefs, self._x)

    #### API functions ####

    def populate(self,
                 total_items=10,
                 enumeration_type='distinct_subsets',
                 cost_type='local',
                 time_limit=None,
                 node_limit=None,
                 display_flag=None):
        """
        Generates a list of actions to flip the predicted value of the linear classifier from feature vector x.

        :param total_items: maximum # of actions to generate
                            set as float('inf') to enumerate all possible actions

        :param enumeration_type: enumeration algorithm to use for the flipset
                            must be a string in Flipset.valid_enumeration_types
                            - 'distinct_subsets'
                            - 'mutually_exclusive'

        :param cost_type: cost function to use for Flipset generation
                          must be a string in Flipset.valid_cost_types
                            options include:
                            - local

        :param time_limit: max # of seconds to spend before stopping the solver at each iteration

        :param node_limit: max # of branch and bound nodes to process before stopping the solver at each iteration

        :param display_flag: True to display solver progress during enumeration

        :return:
        """
        assert enumeration_type in self._valid_enumeration_types, \
            'enumeration_type must be one of %r' % self._valid_enumeration_types

        assert cost_type in self._valid_cost_types, \
            'cost_type must be one of %r' % self._valid_cost_types

        # print("before RecourseBuilder")
        if self._builder is None:
            self._builder = RecourseBuilder(action_set=self.action_set,
                                            x=self.x,
                                            coefficients=self._coefs,
                                            intercept=self._intercept,
                                            mip_cost_type=cost_type,
                                            solver=self._solver)
        # print("FLIPSET ACTIONS: ", self._build_er.actions)
        # print("end RecourseBuilder")

        items = self._builder.populate(total_items=total_items,
                                       enumeration_type=enumeration_type,
                                       time_limit=time_limit,
                                       node_limit=node_limit,
                                       display_flag=display_flag)
        self._add(items)
        return self

    def sort(self, **kwargs):
        """
        Reorders the items in the Flipset dataframe
        Arguments used to sort are saved
        :param sort_args: list of fields to use in sort, or arguments passed to pd.DataFrame.sort_values
        :return:
        """
        if len(kwargs) == 0:
            self._df.sort_values(**self._sort_args)
            return

        if 'by' in kwargs:
            sort_names = kwargs['by']
        else:
            sort_names = list(kwargs.keys())

        assert isinstance(sort_names, list)
        assert len(sort_names) > 0
        for s in sort_names:
            assert isinstance(s, str)
            assert s in self._df.columns

        sort_args = {
            'by': sort_names,
            'inplace': kwargs.get('inplace', True),
            'axis': kwargs.get('axis', 0),
        }
        self._df.sort_values(**sort_args)
        self._sort_args = sort_args

    def view(self):
        """
        prints Flipset as Pandas dataframe
        :return:
        """
        return self._df

    def to_flat_df(self):
        """Flatten out the actionsets in the flipset to product either a latex or HTML representation."""
        self.sort()
        tex_columns = ['features', 'x', 'x_new']
        tex_df = self._df[tex_columns]

        # split components for each item
        tex_df = tex_df.reset_index().rename(columns={'index': 'item_id'})
        df_list = []
        for n in tex_columns:
            tmp = tex_df.set_index(['item_id'])[n].apply(pd.Series).stack()
            tmp = tmp.reset_index().rename(columns={'level_1': 'var_id'})
            tmp_name = tmp.columns[-1]
            tmp = tmp.rename(columns={tmp_name: n})
            df_list.append(tmp)

        # combine into a flattened list
        flat_df = df_list[0]
        for k in range(1, len(df_list)):
            flat_df = flat_df.merge(df_list[k])

        # drop the merge index
        flat_df = flat_df.drop(columns=['var_id'])

        # index items by item_id
        flat_df = flat_df.sort_values(by='item_id')
        flat_df = flat_df.rename(columns={'item_id': 'item'})
        return flat_df.set_index('item')

    def to_latex(self, name_formatter='\\textit'):
        """
        converts current Flipset to Latex table
        :param name_formatter:
        :return:
        """
        flat_df = self.to_flat_df()

        # add another column for the latex arrow symbol
        idx = flat_df.columns.tolist().index('x_new')
        flat_df.insert(loc=idx,
                       column='to',
                       value=['longrightarrow'] * len(flat_df))

        # name headers
        flat_df = flat_df.rename(
            columns={
                'features': '\textsc{Feature Subset}',
                'x': '\textsc{Current Values}',
                'x_new': '\textsc{Required Values}'
            })

        # get raw tex table
        table = flat_df.to_latex(multirow=True,
                                 index=True,
                                 escape=False,
                                 na_rep='-',
                                 column_format='rlccc')

        # manually wrap names with a formatter function
        if name_formatter is not None:
            for v in self._variable_names:
                table = table.replace(v, '%s{%s}' % (name_formatter, v))

        # add the backslash for the arrow
        table = table.replace('longrightarrow', '$\\longrightarrow$')

        # minor embellishments
        table = table.split('\n')
        table[2] = table[2].replace('to', '')
        table[2] = table[2].replace('{}', '')
        table.pop(3)
        table.pop(3)
        return '\n'.join(table)

    def to_html(self):
        def _color_white_or_gray(row):
            color = 'white' if row.name[0] % 2 == 0 else 'lightgray'
            res = 'background-color: %s' % color
            return [res] * len(row)

        flat_df = self.to_flat_df()

        # add another column for the latex arrow symbol
        idx = flat_df.columns.tolist().index('x_new')
        flat_df.insert(loc=idx, column='to', value=['&#8594;'] * len(flat_df))

        idx = (pd.DataFrame(flat_df.index).assign(
            row=lambda df: df.groupby('item').cumcount().pipe(lambda s: s + 1)
        ).pipe(lambda df: list(zip(df['item'], df['row']))))
        idx = pd.MultiIndex.from_tuples(idx)
        flat_df.index = idx
        html = (flat_df.style.set_table_styles([{
            "selector":
            "tr",
            "props": [('background-color', 'white')]
        }]).apply(_color_white_or_gray, axis=1).render())
        return html

    #### item management ####
    def _add(self, items):
        """
        :param items: adds new items to flipset
        :return:
        """
        if isinstance(items, dict):
            items = [items]
        assert isinstance(items, list)
        items = list(map(lambda i: self._validate_item(i), items))
        self._items.extend(items)
        self._add_to_df(items)

    def _validate_item(self, item):
        """
        checks item to be added to the current Flipset
        :param item: raw flipset item
        :return: item in correct format
        """
        assert isinstance(item, dict)
        required_fields = ['feasible', 'actions', 'cost']
        for k in required_fields:
            assert k in item, 'item missing field %s' % k
        item['actions'] = self._validate_action(item['actions'])
        assert item['cost'] > 0.0, 'total cost must be positive'
        assert item['feasible'], 'item must be feasible'
        return item

    def _validate_action(self, a):
        """
        checks action vector to the added to the current Flipset
        :param a: action vector
        :return: a or AssertionError
        """
        a = np.array(a, dtype=np.float_).flatten()
        assert len(
            a
        ) == self._n_variables, 'action vector must have %d elements' % self.n_variables
        assert np.isfinite(a).all(), 'actions must be finite'
        assert np.count_nonzero(
            a) >= 1, 'at least one action element must be non zero'
        assert np.not_equal(self.yhat, self.predict(
            a)), 'actions do not flip the prediction from %d' % self.yhat
        return a

    def _add_to_df(self, items):
        if len(items) > 0:
            row_data = list(map(lambda item: self._item_to_df_row(item),
                                items))
            self._df = self._df.append(row_data, ignore_index=True,
                                       sort=True)[self._df.columns.tolist()]
            self.sort()

    def _item_to_df_row(self, item):
        """
        converts item to a row in the data frame
        :param item:
        :return:
        """
        x = self.x
        a = item['actions']
        h = self.predict(a)
        nnz_idx = np.flatnonzero(a)
        row = {
            'cost': float(item['cost']),
            'size': len(nnz_idx),
            'features': [self._variable_names[j] for j in nnz_idx],
            'feature_idx': nnz_idx,
            'x': x[nnz_idx],
            'x_new': x[nnz_idx] + a[nnz_idx],
            'score_new': self.score(a),
            'yhat_new': h,
            'feasible': item['feasible'],
            'flipped': np.not_equal(h, self.yhat),
        }

        return row
Beispiel #11
0
def genExp(model_trained, factual_sample, norm_type, dataset_obj):

    start_time = time.time()

    # SIMPLE HACK!!
    # ActionSet() construction demands that all variables have a range to them. In
    # the case of one-hot ordinal variables (e.g., x2_ord_0, x2_ord_1, x2_ord_2)),
    # the first sub-category (i.e., x2_ord_0) will always have range(1,1), failing
    # the requirements of ActionSet(). Therefore, we add a DUMMY ROW to the data-
    # frame, which is a copy of another row (so not to throw off the range of other
    # attributes), but setting a 0 value to all _ord_ variables. (might as well do
    # this for all _cat_ variables as well).
    tmp_df = dataset_obj.data_frame_kurz
    sample_row = tmp_df.loc[0].to_dict()
    for attr_name_kurz in dataset_obj.getOneHotAttributesNames('kurz'):
        sample_row[attr_name_kurz] = 0
    tmp_df = tmp_df.append(pd.Series(sample_row), ignore_index=True)

    df = tmp_df
    X = df.loc[:, df.columns != 'y']

    # Enforce binary, categorical (including ordinal) variables only take on 2 values
    custom_bounds = {
        attr_name_kurz: (0, 100, 'p')
        for attr_name_kurz in np.union1d(
            dataset_obj.getOneHotAttributesNames('kurz'),
            dataset_obj.getBinaryAttributeNames('kurz'))
    }
    action_set = ActionSet(X=X, custom_bounds=custom_bounds)
    # action_set['x1'].mutable = False # x1 = 'Race'
    # In the current implementation, we only supports any/none actionability
    for attr_name_kurz in dataset_obj.getInputAttributeNames('kurz'):
        attr_obj = dataset_obj.attributes_kurz[attr_name_kurz]
        if attr_obj.actionability == 'none':
            action_set[attr_name_kurz].mutable = False
        elif attr_obj.actionability == 'any':
            continue  # do nothing
        else:
            raise ValueError(
                f'Actionable Recourse does not support actionability type {attr_obj.actionability}'
            )

    # Enforce search over integer-based grid for integer-based variables
    for attr_name_kurz in np.union1d(
            dataset_obj.getIntegerBasedAttributeNames('kurz'),
            dataset_obj.getBinaryAttributeNames('kurz'),
    ):
        action_set[attr_name_kurz].step_type = "absolute"
        action_set[attr_name_kurz].step_size = 1

    coefficients = model_trained.coef_[0]
    intercept = model_trained.intercept_[0]

    if norm_type == 'one_norm':
        mip_cost_type = 'total'
    elif norm_type == 'infty_norm':
        mip_cost_type = 'max'
    else:
        raise ValueError(
            f'Actionable Recourse does not support norm_type {norm_type}')

    factual_sample_values = list(factual_sample.values())
    # p = .8
    rb = RecourseBuilder(
        optimizer="cplex",
        coefficients=coefficients,
        intercept=intercept,  # - (np.log(p / (1. - p))),
        action_set=action_set,
        x=factual_sample_values,
        mip_cost_type=mip_cost_type)

    output = rb.fit()
    counterfactual_sample_values = np.add(factual_sample_values,
                                          output['actions'])
    counterfactual_sample = dict(
        zip(factual_sample.keys(), counterfactual_sample_values))

    # factual_sample['y'] = False
    # counterfactual_sample['y'] = True
    counterfactual_sample['y'] = not factual_sample['y']
    counterfactual_plausible = True

    # IMPORTANT: no need to check for integer-based / binary-based plausibility,
    # because those were set above when we said step_type = absolute! Just round!
    for attr_name_kurz in np.union1d(
            dataset_obj.getOneHotAttributesNames('kurz'),
            dataset_obj.getBinaryAttributeNames('kurz')):
        try:
            assert np.isclose(counterfactual_sample[attr_name_kurz],
                              np.round(counterfactual_sample[attr_name_kurz]))
            counterfactual_sample[attr_name_kurz] = np.round(
                counterfactual_sample[attr_name_kurz])
        except:
            distance = -1
            counterfactual_plausible = False
            # return counterfactual_sample, distance

    # Perform plausibility-data-type check! Remember, all ordinal variables
    # have already been converted to categorical variables. It is important now
    # to check that 1 (and only 1) sub-category is activated in the resulting
    # counterfactual sample.
    already_considered = []
    for attr_name_kurz in dataset_obj.getOneHotAttributesNames('kurz'):
        if attr_name_kurz not in already_considered:
            siblings_kurz = dataset_obj.getSiblingsFor(attr_name_kurz)
            activations_for_category = [
                counterfactual_sample[attr_name_kurz]
                for attr_name_kurz in siblings_kurz
            ]
            sum_activations_for_category = np.sum(activations_for_category)
            if 'cat' in dataset_obj.attributes_kurz[attr_name_kurz].attr_type:
                if sum_activations_for_category == 1:
                    continue
                else:
                    # print('not plausible, fixing..', end='')
                    # TODO: don't actually return early! Instead see the actual distance,
                    # fingers crossed that we can say that not only is their method giving
                    # counterfactuals are larger distances, but in a lot of cases they are
                    # not data-type plausible
                    # INSTEAD, do below:
                    # Convert to correct categorical/ordinal activations so we can
                    # compute the distance using already written function.
                    #     Turns out we need to do nothing, because the distance between
                    #     [0,1,0] and anything other than itself, (e.g., [1,1,0] or [1,0,1])
                    #     is always 1 :)
                    # continue
                    distance = -1
                    counterfactual_plausible = False
                    # return counterfactual_sample, distance
            elif 'ord' in dataset_obj.attributes_kurz[
                    attr_name_kurz].attr_type:
                # TODO: assert activations are in order...
                # if not, repeat as above...
                for idx in range(int(sum_activations_for_category)):
                    if activations_for_category[idx] != 1:
                        # Convert to correct categorical/ordinal activations so we can
                        # compute the distance using already written function.
                        # Find the max index of 1 in the array, and set everything before that to 1
                        # print('not plausible, fixing..', end='')
                        # max_index_of_1 = np.where(np.array(activations_for_category) == 1)[0][-1]
                        # for idx2 in range(max_index_of_1 + 1):
                        #   counterfactual_sample[siblings_kurz[idx2]] = 1
                        # break
                        distance = -1
                        counterfactual_plausible = False
                        # return counterfactual_sample, distance
            else:
                raise Exception(
                    f'{attr_name_kurz} must include either `cat` or `ord`.')
            already_considered.extend(siblings_kurz)

    # TODO: convert to correct categorical/ordinal activations so we can compute

    # distance = output['cost'] # TODO: this must change / be verified!???
    distance = normalizedDistance.getDistanceBetweenSamples(
        factual_sample, counterfactual_sample, norm_type, dataset_obj)

    # # TODO: post-feasibible needed???? NO
    # # make plausible by rounding all non-numeric-real attributes to
    # # nearest value in range
    # for idx, elem in enumerate(es_instance):
    #     attr_name_kurz = dataset_obj.getInputAttributeNames('kurz')[idx]
    #     attr_obj = dataset_obj.attributes_kurz[attr_name_kurz]
    #     if attr_obj.attr_type != 'numeric-real':
    #         # round() might give a value that is NOT in plausible.
    #         # instead find the nearest plausible value
    #         es_instance[idx] = min(
    #             list(range(int(attr_obj.lower_bound), int(attr_obj.upper_bound) + 1)),
    #             key = lambda x : abs(x - es_instance[idx])
    #         )

    end_time = time.time()

    return {
        'factual_sample': factual_sample,
        'cfe_sample': counterfactual_sample,
        'cfe_found': True,  # TODO?
        'cfe_plausible': counterfactual_plausible,
        'cfe_distance': distance,
        'cfe_time': end_time - start_time,
    }