Beispiel #1
0
def fair_metrics(bst, data, column, thresh):
    tr = list(data.get_label())
    best_iteration = bst.best_ntree_limit
    pred = bst.predict(data, ntree_limit=best_iteration)
    pred = [1 if p > thresh else 0 for p in pred]
    na0 = 0
    na1 = 0
    nd0 = 0
    nd1 = 0
    for p, c in zip(pred, column):
        if (p == 1 and c == 0):
            nd1 += 1
        if (p == 1 and c == 1):
            na1 += 1
        if (p == 0 and c == 0):
            nd0 += 1
        if (p == 0 and c == 1):
            na0 += 1
    Pa1, Pd1, Pa0, Pd0 = na1 / (na1 + na0), nd1 / (nd1 + nd0), na0 / (
        na1 + na0), nd0 / (nd1 + nd0)
    dsp_metric = np.abs(Pd1 - Pa1)
    #dsp_metric = np.abs((first-second)/(first+second))
    sr_metric = selection_rate(tr, pred, pos_label=1)
    dpd_metric = demographic_parity_difference(tr,
                                               pred,
                                               sensitive_features=column)
    dpr_metric = demographic_parity_ratio(tr, pred, sensitive_features=column)
    eod_metric = equalized_odds_difference(tr, pred, sensitive_features=column)

    return dsp_metric, sr_metric, dpd_metric, dpr_metric, eod_metric
Beispiel #2
0
def equalized_odds(df_test_encoded, predictions, print_=False):
    eod_sex = equalized_odds_difference(df_test_encoded.earnings, predictions, sensitive_features=df_test_encoded.sex)
    eor_sex = equalized_odds_ratio(df_test_encoded.earnings, predictions, sensitive_features=df_test_encoded.sex)

    if (print_):
        print(f"equalised odds difference sex: {eod_sex:.3f}")
        print(f"equalised odds ratio sex: {eor_sex:.3f}")
Beispiel #3
0
def __binary_group_fairness_measures(X,
                                     prtc_attr,
                                     y_true,
                                     y_pred,
                                     y_prob=None,
                                     priv_grp=1):
    """[summary]

    Args:
        X (pandas DataFrame): Sample features
        prtc_attr (named array-like): values for the protected attribute
            (note: protected attribute may also be present in X)
        y_true (pandas DataFrame): Sample targets
        y_pred (pandas DataFrame): Sample target predictions
        y_prob (pandas DataFrame, optional): Sample target probabilities. Defaults
            to None.

    Returns:
        [type]: [description]
    """
    pa_names = prtc_attr.columns.tolist()
    gf_vals = {}
    gf_key = 'Group Fairness'
    gf_vals['Statistical Parity Difference'] = \
        aif_mtrc.statistical_parity_difference(y_true, y_pred, prot_attr=pa_names)
    gf_vals['Disparate Impact Ratio'] = \
        aif_mtrc.disparate_impact_ratio(y_true, y_pred, prot_attr=pa_names)
    if not helper.is_tutorial_running() and not len(pa_names) > 1:
        gf_vals['Demographic Parity Difference'] = \
            fl_mtrc.demographic_parity_difference(y_true, y_pred,
                                                  sensitive_features=prtc_attr)
        gf_vals['Demographic Parity Ratio'] = \
            fl_mtrc.demographic_parity_ratio(y_true, y_pred,
                                             sensitive_features=prtc_attr)
    gf_vals['Average Odds Difference'] = \
        aif_mtrc.average_odds_difference(y_true, y_pred, prot_attr=pa_names)
    gf_vals['Equal Opportunity Difference'] = \
        aif_mtrc.equal_opportunity_difference(y_true, y_pred, prot_attr=pa_names)
    if not helper.is_tutorial_running() and not len(pa_names) > 1:
        gf_vals['Equalized Odds Difference'] = \
            fl_mtrc.equalized_odds_difference(y_true, y_pred,
                                              sensitive_features=prtc_attr)
        gf_vals['Equalized Odds Ratio'] = \
            fl_mtrc.equalized_odds_ratio(y_true, y_pred,
                                         sensitive_features=prtc_attr)
    gf_vals['Positive Predictive Parity Difference'] = \
        aif_mtrc.difference(sk_metric.precision_score, y_true,
                            y_pred, prot_attr=pa_names, priv_group=priv_grp)
    gf_vals['Balanced Accuracy Difference'] = \
        aif_mtrc.difference(sk_metric.balanced_accuracy_score, y_true,
                            y_pred, prot_attr=pa_names, priv_group=priv_grp)
    if y_prob is not None:
        gf_vals['AUC Difference'] = \
            aif_mtrc.difference(sk_metric.roc_auc_score, y_true, y_prob,
                                prot_attr=pa_names, priv_group=priv_grp)
    return (gf_key, gf_vals)
def test_equalized_odds_difference(agg_method):
    actual = equalized_odds_difference(y_t,
                                       y_p,
                                       sensitive_features=g_1,
                                       method=agg_method)

    metrics = {'tpr': true_positive_rate, 'fpr': false_positive_rate}
    gm = MetricFrame(metrics, y_t, y_p, sensitive_features=g_1)

    diffs = gm.difference(method=agg_method)
    assert actual == diffs.max()
def test_equalized_odds_difference_weighted(agg_method):
    actual = equalized_odds_difference(y_t,
                                       y_p,
                                       sensitive_features=g_1,
                                       method=agg_method,
                                       sample_weight=s_w)

    metrics = {'tpr': true_positive_rate, 'fpr': false_positive_rate}
    sw = {'sample_weight': s_w}
    sp = {'tpr': sw, 'fpr': sw}
    gm = MetricFrame(metrics,
                     y_t,
                     y_p,
                     sensitive_features=g_1,
                     sample_params=sp)

    diffs = gm.difference(method=agg_method)
    assert actual == diffs.max()
Beispiel #6
0
def evaluate_model(model, device, criterion, data_loader):
    model.eval()
    y_true = []
    y_pred = []
    y_out = []
    sensitives = []
    for i, data in enumerate(data_loader):
        x, y, sensitive_features = data
        x = x.to(device)
        y = y.to(device)
        sensitive_features = sensitive_features.to(device)
        with torch.no_grad():
            logit = model(x)
        # logit : binary prediction size=(b, 1)
        bina = (torch.sigmoid(logit) > 0.5).float()
        y_true += y.cpu().tolist()
        y_pred += bina.cpu().tolist()
        y_out += torch.sigmoid(logit).tolist()
        sensitives += sensitive_features.cpu().tolist()
    result = {}
    result["acc"] = skm.accuracy_score(y_true, y_pred)
    result["f1score"] = skm.f1_score(y_true, y_pred)
    result["AUC"] = skm.roc_auc_score(y_true, y_out)
    result['DP'] = {
        "diff":
        flm.demographic_parity_difference(
            y_true, y_pred, sensitive_features=sensitive_features),
        "ratio":
        flm.demographic_parity_ratio(y_true,
                                     y_pred,
                                     sensitive_features=sensitive_features),
    }
    result["EO"] = {
        "diff":
        flm.equalized_odds_difference(y_true,
                                      y_pred,
                                      sensitive_features=sensitive_features),
        "ratio":
        flm.equalized_odds_ratio(y_true,
                                 y_pred,
                                 sensitive_features=sensitive_features),
    }
    return result
Beispiel #7
0
def test(args, model, device, test_loader, test_size, sensitive_idx):

    model.eval()
    criterion = nn.BCELoss()
    test_loss = 0
    correct = 0
    i = 0

    avg_recall = 0
    avg_precision = 0
    overall_results = []
    avg_eq_odds = 0
    avg_dem_par = 0
    avg_tpr = 0
    avg_tp = 0
    avg_tn = 0
    avg_fp = 0
    avg_fn = 0
    with torch.no_grad():
        for cats, conts, target in tqdm(test_loader):
            print("*********")
            #i += 1
            cats, conts, target = cats.to(device), conts.to(device), target.to(device)


            output = model(cats, conts)
            test_loss += criterion(output, target).item()  # sum up batch loss
            pred = (output > 0.5).float()
            correct += pred.eq(target.view_as(pred)).sum().item()

            curr_datetime = datetime.now()
            curr_hour = curr_datetime.hour
            curr_min = curr_datetime.minute

            pred_df = pd.DataFrame(pred.numpy())
            pred_df.to_csv(f"pred_results/{args.run_name}_{curr_hour}-{curr_min}.csv")

            # confusion matrixç
            tn, fp, fn, tp = confusion_matrix(target, pred).ravel()
            avg_tn+=tn
            avg_fp+=fp
            avg_fn+=fn
            avg_tp+=tp

            # position of col for sensitive values
            sensitive = [i[sensitive_idx].item() for i in cats]
            cat_len = max(sensitive)
            print(cat_len)
            #exit()
            sub_cm = []
            #print(cat_len)
            for j in range(cat_len+1):
                try:
                    idx = list(locate(sensitive, lambda x: x == j))
                    sub_tar = target[idx]
                    sub_pred = pred[idx]
                    sub_tn, sub_fp, sub_fn, sub_tp = confusion_matrix(sub_tar, sub_pred).ravel()
                except:
                    # when only one value to predict
                    temp_tar = int(sub_tar.numpy()[0])
                    temp_pred = int(sub_pred.numpy()[0])
                    #print(tar, pred)
                    if temp_tar and temp_pred:
                        sub_tn, sub_fp, sub_fn, sub_tp = 0, 0, 0, 1
                    elif temp_tar and not temp_pred:
                        sub_tn, sub_fp, sub_fn, sub_tp = 0, 0, 1, 0
                    elif not temp_tar and not temp_pred:
                        sub_tn, sub_fp, sub_fn, sub_tp = 1, 0, 0, 0
                    elif not temp_tar and temp_pred:
                        sub_tn, sub_fp, sub_fn, sub_tp = 0, 1, 0, 0
                    else:
                        sub_tn, sub_fp, sub_fn, sub_tp = 0, 0, 0, 0

                total = mysum(sub_tn, sub_fp, sub_fn, sub_tp)
                sub_cm.append((sub_tn/total, sub_fp/total, sub_fn/total, sub_tp/total))

            # Fairness metrics
            group_metrics = MetricFrame({'precision': skm.precision_score, 'recall': skm.recall_score},
                                        target, pred,
                                        sensitive_features=sensitive)

            demographic_parity = flm.demographic_parity_difference(target, pred,
                                                                   sensitive_features=sensitive)

            eq_odds = flm.equalized_odds_difference(target, pred,
                                                    sensitive_features=sensitive)

            # metric_fns = {'true_positive_rate': true_positive_rate}

            tpr = MetricFrame(true_positive_rate,
                              target, pred,
                              sensitive_features=sensitive)

            # tpr = flm.true_positive_rate(target, pred,sample_weight=sensitive)
            sub_results = group_metrics.overall.to_dict()
            sub_results_by_group = group_metrics.by_group.to_dict()

            #print("\n", group_metrics.by_group, "\n")
            avg_precision += sub_results['precision']
            avg_recall += sub_results['recall']
            overall_results.append(sub_results_by_group)
            avg_eq_odds += eq_odds
            avg_dem_par += demographic_parity
            avg_tpr += tpr.difference(method='between_groups')

    print(i)
    total = mysum(avg_tn, avg_fp, avg_fn, avg_tp)
    cm = (avg_tn/total, avg_fp/total, avg_fn/total, avg_tp/total)
    test_loss /= test_size
    accuracy = correct / test_size
    avg_loss = test_loss


    return accuracy, avg_loss, avg_precision, avg_recall, avg_eq_odds, avg_tpr, avg_dem_par, cm, sub_cm, overall_results
def test_student(args, student_train_loader, student_labels,
                 student_test_loader, test_size, cat_emb_size, num_conts,
                 device, sensitive_idx):
    student_model = RandomForestClassifier(random_state=42,
                                           warm_start=True,
                                           n_estimators=100)

    print("========== Testing Student Model ==========")
    for epoch in range(args.epochs):
        train_loader = student_loader(student_train_loader, student_labels)
        for (cats, conts), labels in train_loader:
            X = torch.cat((cats, conts), 1)
            student_model = student_model.fit(X, labels)

            test_loss = 0
            correct = 0
            i = 0

            avg_recall = 0
            avg_precision = 0
            overall_results = []
            avg_eq_odds = 0
            avg_dem_par = 0
            avg_tpr = 0
            avg_tp = 0
            avg_tn = 0
            avg_fp = 0
            avg_fn = 0

            with torch.no_grad():
                for batch_idx, (cats, conts,
                                target) in enumerate(student_test_loader):
                    print("target\n", sum(target))
                    i += 1
                    X = torch.cat((cats, conts), 1)
                    output = student_model.predict(X)
                    output = torch.from_numpy(output)
                    pred = (output > 0.5).float()
                    print("pred\n", sum(pred))
                    correct += pred.eq(target.view_as(pred)).sum().item()

                    curr_datetime = datetime.now()
                    curr_hour = curr_datetime.hour
                    curr_min = curr_datetime.minute

                    pred_df = pd.DataFrame(pred.numpy())
                    pred_df.to_csv(
                        f"pred_results/{args.run_name}_{curr_hour}-{curr_min}.csv"
                    )

                    #print(pred, np.sum(np.squeeze(pred.eq(target.data.view_as(pred))).cpu().numpy()))
                    #correct += np.sum(np.squeeze(pred.eq(target.data.view_as(pred))).cpu().numpy())
                    #total += cats.size(0)

                    # confusion matrixç
                    tn, fp, fn, tp = confusion_matrix(target, pred).ravel()
                    avg_tn += tn
                    avg_fp += fp
                    avg_fn += fn
                    avg_tp += tp

                    # position of col for sensitive values
                    sensitive = [i[sensitive_idx].item() for i in cats]
                    cat_len = max(sensitive)

                    #exit()
                    sub_cm = []
                    # print(cat_len)
                    for j in range(cat_len + 1):
                        try:
                            idx = list(locate(sensitive, lambda x: x == j))
                            sub_tar = target[idx]
                            sub_pred = pred[idx]
                            sub_tn, sub_fp, sub_fn, sub_tp = confusion_matrix(
                                sub_tar, sub_pred).ravel()
                        except:
                            # when only one value to predict
                            temp_tar = int(sub_tar.numpy()[0])
                            temp_pred = int(sub_pred.numpy()[0])
                            # print(tar, pred)
                            if temp_tar and temp_pred:
                                sub_tn, sub_fp, sub_fn, sub_tp = 0, 0, 0, 1
                            elif temp_tar and not temp_pred:
                                sub_tn, sub_fp, sub_fn, sub_tp = 0, 0, 1, 0
                            elif not temp_tar and not temp_pred:
                                sub_tn, sub_fp, sub_fn, sub_tp = 1, 0, 0, 0
                            elif not temp_tar and temp_pred:
                                sub_tn, sub_fp, sub_fn, sub_tp = 0, 1, 0, 0
                            else:
                                sub_tn, sub_fp, sub_fn, sub_tp = 0, 0, 0, 0

                        total = mysum(sub_tn, sub_fp, sub_fn, sub_tp)
                        print("??", total)
                        sub_cm.append((sub_tn / total, sub_fp / total,
                                       sub_fn / total, sub_tp / total))

                    # Fairness metrics

                    group_metrics = MetricFrame(
                        {
                            'precision': skm.precision_score,
                            'recall': skm.recall_score
                        },
                        target,
                        pred,
                        sensitive_features=sensitive)

                    print(target)
                    print(pred)
                    demographic_parity = flm.demographic_parity_difference(
                        target, pred, sensitive_features=sensitive)

                    eq_odds = flm.equalized_odds_difference(
                        target, pred, sensitive_features=sensitive)

                    # metric_fns = {'true_positive_rate': true_positive_rate}

                    tpr = MetricFrame(true_positive_rate,
                                      target,
                                      pred,
                                      sensitive_features=sensitive)

                    # tpr = flm.true_positive_rate(target, pred,sample_weight=sensitive)
                    sub_results = group_metrics.overall.to_dict()
                    sub_results_by_group = group_metrics.by_group.to_dict()

                    # print("\n", group_metrics.by_group, "\n")
                    avg_precision += sub_results['precision']
                    avg_recall += sub_results['recall']
                    print("pre_rec", sub_results)
                    overall_results.append(sub_results_by_group)
                    avg_eq_odds += eq_odds
                    print("eqo", eq_odds)
                    avg_dem_par += demographic_parity
                    print("dempar", demographic_parity)
                    avg_tpr += tpr.difference(method='between_groups')
                    print("tpr", tpr.difference(method='between_groups'))

            total = mysum(avg_tn, avg_fp, avg_fn, avg_tp)
            print("!!", total)
            cm = (avg_tn / total, avg_fp / total, avg_fn / total,
                  avg_tp / total)
            test_loss /= test_size
            accuracy = correct / test_size
            avg_loss = test_loss

            return accuracy, avg_loss, avg_precision, avg_recall, avg_eq_odds, avg_tpr, avg_dem_par, cm, sub_cm, overall_results
def test_student(args, student_train_loader, student_labels, student_test_loader, test_size, cat_emb_size, num_conts, device, sensitive_idx):
    student_model = RegressionModel(emb_szs=cat_emb_size,
                    n_cont=num_conts,
                    emb_drop=0.04,
                    out_sz=1,
                    szs=[1000, 500, 250],
                    drops=[0.001, 0.01, 0.01],
                    y_range=(0, 1)).to(device)

    criterion = nn.BCELoss()
    optimizer = optim.SGD(student_model.parameters(), lr=args.lr, momentum=0)
    steps = 0
    running_loss = 0
    correct = 0
    print("========== Testing Student Model ==========")
    for epoch in range(args.epochs):
        student_model.train()
        train_loader = student_loader(student_train_loader, student_labels)
        for (cats, conts) , labels in train_loader:
        #for _batch_idx, (data, target) in enumerate(tqdm(train_loader)):
            #cats = data[0]
            #conts = data[1]
            steps += 1

            optimizer.zero_grad()
            output = student_model(cats, conts).view(-1)
            labels = labels.to(torch.float32)
            loss = criterion(output, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        #            if steps % 50 == 0:
            student_model.eval()
            test_loss = 0
            correct = 0
            i = 0

            avg_recall = 0
            avg_precision = 0
            overall_results = []
            avg_eq_odds = 0
            avg_dem_par = 0
            avg_tpr = 0
            avg_tp = 0
            avg_tn = 0
            avg_fp = 0
            avg_fn = 0

            with torch.no_grad():
                for batch_idx, (cats, conts, target) in enumerate(student_test_loader):
                    print("target\n", sum(target))
                    i+=1
                    output = student_model(cats, conts)
                    loss += criterion(output, target).item()
                    test_loss = test_loss + ((1 / (batch_idx + 1)) * (loss.data - test_loss))
                    pred = (output > 0.5).float()
                    print("pred\n", sum(pred))
                    correct += pred.eq(target.view_as(pred)).sum().item()

                    curr_datetime = datetime.now()
                    curr_hour = curr_datetime.hour
                    curr_min = curr_datetime.minute

                    pred_df = pd.DataFrame(pred.numpy())
                    pred_df.to_csv(f"pred_results/{args.run_name}_{curr_hour}-{curr_min}.csv")

                    #print(pred, np.sum(np.squeeze(pred.eq(target.data.view_as(pred))).cpu().numpy()))
                    #correct += np.sum(np.squeeze(pred.eq(target.data.view_as(pred))).cpu().numpy())
                    #total += cats.size(0)


                    # confusion matrixç
                    tn, fp, fn, tp = confusion_matrix(target, pred).ravel()
                    avg_tn += tn
                    avg_fp += fp
                    avg_fn += fn
                    avg_tp += tp

                    # position of col for sensitive values
                    sensitive = [i[sensitive_idx].item() for i in cats]
                    cat_len = max(sensitive)

                    #exit()
                    sub_cm = []
                    # print(cat_len)
                    for j in range(cat_len+1):
                        try:
                            idx = list(locate(sensitive, lambda x: x == j))
                            sub_tar = target[idx]
                            sub_pred = pred[idx]
                            sub_tn, sub_fp, sub_fn, sub_tp = confusion_matrix(sub_tar, sub_pred).ravel()
                        except:
                            # when only one value to predict
                            print("----WHAT?")
                            temp_tar = int(sub_tar.numpy()[0])
                            temp_pred = int(sub_pred.numpy()[0])
                            # print(tar, pred)
                            if temp_tar and temp_pred:
                                sub_tn, sub_fp, sub_fn, sub_tp = 0, 0, 0, 1
                            elif temp_tar and not temp_pred:
                                sub_tn, sub_fp, sub_fn, sub_tp = 0, 0, 1, 0
                            elif not temp_tar and not temp_pred:
                                sub_tn, sub_fp, sub_fn, sub_tp = 1, 0, 0, 0
                            elif not temp_tar and temp_pred:
                                sub_tn, sub_fp, sub_fn, sub_tp = 0, 1, 0, 0
                            else:
                                sub_tn, sub_fp, sub_fn, sub_tp = 0, 0, 0, 0

                        total = mysum(sub_tn, sub_fp, sub_fn, sub_tp)
                        print("??", total)
                        sub_cm.append((sub_tn / total, sub_fp / total, sub_fn / total, sub_tp / total))

                    # Fairness metrics

                    group_metrics = MetricFrame({'precision': skm.precision_score, 'recall': skm.recall_score},
                                                target, pred,
                                                sensitive_features=sensitive)


                    demographic_parity = flm.demographic_parity_difference(target, pred,
                                                                           sensitive_features=sensitive)

                    eq_odds = flm.equalized_odds_difference(target, pred,
                                                            sensitive_features=sensitive)

                    # metric_fns = {'true_positive_rate': true_positive_rate}

                    tpr = MetricFrame(true_positive_rate,
                                      target, pred,
                                      sensitive_features=sensitive)

                    # tpr = flm.true_positive_rate(target, pred,sample_weight=sensitive)
                    sub_results = group_metrics.overall.to_dict()
                    sub_results_by_group = group_metrics.by_group.to_dict()

                    # print("\n", group_metrics.by_group, "\n")
                    avg_precision += sub_results['precision']
                    avg_recall += sub_results['recall']
                    print("pre_rec", sub_results)
                    overall_results.append(sub_results_by_group)
                    avg_eq_odds += eq_odds
                    print("eqo", eq_odds)
                    avg_dem_par += demographic_parity
                    print("dempar", demographic_parity)
                    avg_tpr += tpr.difference(method='between_groups')
                    print("tpr", tpr.difference(method='between_groups'))

            total = mysum(avg_tn, avg_fp, avg_fn, avg_tp)
            print("!!", total)
            cm = (avg_tn / total, avg_fp / total, avg_fn / total, avg_tp / total)
            test_loss /= test_size
            accuracy = correct / test_size
            avg_loss = test_loss

            return accuracy, avg_loss, avg_precision, avg_recall, avg_eq_odds, avg_tpr, avg_dem_par, cm, sub_cm, overall_results
Beispiel #10
0
def fair_metrics(gt, y, group):
    metrics_dict = {
        "DPd": demographic_parity_difference(gt, y, sensitive_features=group),
        "EOd": equalized_odds_difference(gt, y, sensitive_features=group),
    }
    return metrics_dict