Ejemplo n.º 1
0
class Fair_rew_NN():
    def __init__(self, un_gr, pr_gr, inp_size, num_layers_y, step_y):

        self.model_reweight = Reweighing(un_gr, pr_gr)
        self.model = FairClass(inp_size, num_layers_y, step_y)

    def fit(self, data, labels, prot):
        ds = BinaryLabelDataset(df=data,
                                label_names=labels,
                                protected_attribute_names=prot)
        self.prot = prot
        x = self.model_reweight.fit_transform(ds)
        index = x.feature_names.index(prot[0])
        x_train = np.delete(x.features, index, 1)
        y_train = x.labels
        x_train = torch.tensor(x_train).type('torch.FloatTensor')
        y_train = torch.tensor(y_train).type('torch.FloatTensor')
        self.model.fit(x_train, y_train)

    def predict_proba(self, data_test):
        x = self.model_reweight.transform(data_test)
        index = x.feature_names.index(self.prot[0])
        x_test = np.delete(x.features, index, 1)
        x_test = torch.tensor(x_test).type('torch.FloatTensor')
        y = self.model.predict_proba(x_test)
        return y
Ejemplo n.º 2
0
class Fair_rew_RF():
    def __init__(self, un_gr, pr_gr, n_est=100, min_sam_leaf=25):
        self.model_reweight = Reweighing(un_gr, pr_gr)
        self.model = RandomForestClassifier(n_estimators=n_est,
                                            min_samples_leaf=min_sam_leaf)

    def fit(self, data, labels, prot):
        ds = BinaryLabelDataset(df=data,
                                label_names=labels,
                                protected_attribute_names=prot)
        self.prot = prot
        x = self.model_reweight.fit_transform(ds)
        index = x.feature_names.index(prot[0])
        x_train = np.delete(x.features, index, 1)
        y_train = x.labels.ravel()
        self.model.fit(x_train, y_train)

    def predict_proba(self, data_test):
        x = self.model_reweight.transform(data_test)
        index = x.feature_names.index(self.prot[0])
        x_test = np.delete(x.features, index, 1)
        y = self.model.predict_proba(x_test)[:, 1]
        return y
tmp["two_year_recid"] = pred_

parity_diff = compute_statistical_parity(tmp, unpriv_group, priv_group)
tpr_diff, tpr_priv, tpr_unpriv = compute_metrics(t_data, pred_, unpriv_group,
                                                 priv_group)
all_results.append(("Without Race", ps, rs, fs, as_, parity_diff, tpr_diff,
                    tpr_priv, tpr_unpriv))

print(
    f"The precision is {ps}.\nThe recall is {rs}.\nThe F1 is {fs}.\nThe accuracy is {as_}."
)

bag = [X_, y_, pred_]
# %%
log_reg_RW = Reweighing(unpriv_group, priv_group).fit(train_data)
transformed_data = log_reg_RW.transform(train_data)
display(train_data.instance_weights.mean(), train_data.instance_weights.std())
display(transformed_data.instance_weights.mean(),
        transformed_data.instance_weights.std())
t_data = train_data.convert_to_dataframe()[0]
t_data["weights"] = transformed_data.instance_weights
t_data_blacks = t_data[t_data.race == 0]
t_data_whites = t_data[t_data.race == 1]
print(t_data_blacks.weights.describe())
print(t_data_whites.weights.describe())
t_data.boxplot(["weights"], by="race", figsize=(10, 5))
plt.show()

# %%
data = train_data.convert_to_dataframe()[0]
X, y = data.drop(["two_year_recid"], axis=1), data["two_year_recid"]