def evaluate(lr, beta1, beta2, alpha, T0, verbose=False):
        model = load_model(data.num_features,
                           config.get('hyperparameters', {}))
        model.load_state_dict(model_state_dict)
        model.to(device)

        loss_fn = torch.nn.BCELoss()
        optimizer = optim.Adam(model.parameters(),
                               lr=lr,
                               betas=(beta1, beta2),
                               weight_decay=alpha)
        scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(
            optimizer, int(T0))

        for epoch in range(201):
            model.train()
            batch_idxs = torch.split(torch.randperm(data.X_valid.size(0)), 64)
            train_loss = 0
            for batch in batch_idxs:
                X = data.X_valid_gpu[batch, :]
                y = data.y_valid_gpu[batch]

                optimizer.zero_grad()
                loss = loss_fn(model(X)[:, 0], y)
                loss.backward()
                train_loss += loss.item()
                optimizer.step()
                scheduler.step(X.size(0))
            if epoch % 10 == 0 and verbose:
                model.eval()
                with torch.no_grad():
                    valid_loss = loss_fn(
                        model(data.X_valid_valid.to(device))[:, 0],
                        data.y_valid_valid.to(device))
                print(
                    f'=======> Epoch: {epoch} Train loss: {train_loss / len(batch_idxs)} '
                    f'Valid loss: {valid_loss}')

        model.eval()
        with torch.no_grad():
            scores = model(data.X_valid_gpu)[:, 0].reshape(-1).cpu().numpy()

        best_thresh, _ = get_best_thresh(scores,
                                         np.linspace(0, 1, 1001),
                                         data,
                                         config,
                                         valid=False,
                                         margin=config['fairBO']['margin'])
        return get_valid_objective(scores > best_thresh,
                                   data,
                                   config,
                                   valid=False), model, best_thresh
def fairBO_debiasing(model_state_dict, data, config, device):
    def evaluate(lr, beta1, beta2, alpha, T0, verbose=False):
        model = load_model(data.num_features,
                           config.get('hyperparameters', {}))
        model.load_state_dict(model_state_dict)
        model.to(device)

        loss_fn = torch.nn.BCELoss()
        optimizer = optim.Adam(model.parameters(),
                               lr=lr,
                               betas=(beta1, beta2),
                               weight_decay=alpha)
        scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(
            optimizer, int(T0))

        for epoch in range(201):
            model.train()
            batch_idxs = torch.split(torch.randperm(data.X_valid.size(0)), 64)
            train_loss = 0
            for batch in batch_idxs:
                X = data.X_valid_gpu[batch, :]
                y = data.y_valid_gpu[batch]

                optimizer.zero_grad()
                loss = loss_fn(model(X)[:, 0], y)
                loss.backward()
                train_loss += loss.item()
                optimizer.step()
                scheduler.step(X.size(0))
            if epoch % 10 == 0 and verbose:
                model.eval()
                with torch.no_grad():
                    valid_loss = loss_fn(
                        model(data.X_valid_valid.to(device))[:, 0],
                        data.y_valid_valid.to(device))
                print(
                    f'=======> Epoch: {epoch} Train loss: {train_loss / len(batch_idxs)} '
                    f'Valid loss: {valid_loss}')

        model.eval()
        with torch.no_grad():
            scores = model(data.X_valid_gpu)[:, 0].reshape(-1).cpu().numpy()

        best_thresh, _ = get_best_thresh(scores,
                                         np.linspace(0, 1, 1001),
                                         data,
                                         config,
                                         valid=False,
                                         margin=config['fairBO']['margin'])
        return get_valid_objective(scores > best_thresh,
                                   data,
                                   config,
                                   valid=False), model, best_thresh

    space = config['fairBO']['hyperparameters']
    search_space = {}
    bounds_dict = {}
    for var in space:
        search_space[var] = np.arange(space[var]['start'], space[var]['end'],
                                      space[var]['step'])
        bounds_dict[var] = torch.tensor(
            [space[var]['start'], space[var]['end']])
        if space[var]['log_scale']:
            search_space[var] = np.exp(np.log(10) * search_space[var])
            bounds_dict[var] = torch.exp(float(np.log(10)) * bounds_dict[var])

    def sample_space():
        return {
            var: np.random.choice(rng)
            for var, rng in search_space.items()
        }

    X_hyp = []
    y_hyp = []
    best_model = [None, -math.inf, -1]
    for it in range(config['fairBO']['initial_budget']):
        X_hyp.append(sample_space())
        logger.info(
            f'(Iteration {it}) Evaluating fairBO with sample {X_hyp[-1]}')
        y_eval, model_candidate, thresh = evaluate(**X_hyp[-1])
        logger.info(f'Result: {y_eval}')
        if y_eval['objective'] > best_model[1]:
            best_model[0] = copy.deepcopy(model_candidate)
            best_model[1] = y_eval['objective']
            best_model[2] = thresh
        y_hyp.append(y_eval)

    X_df = pd.DataFrame(X_hyp)
    X = torch.tensor(X_df.to_numpy())
    y = torch.tensor(pd.DataFrame(y_hyp)[['performance', 'bias']].to_numpy())

    for it in range(config['fairBO']['total_budget'] -
                    config['fairBO']['initial_budget']):
        xscaler = StandardScaler()
        gp = SingleTaskGP(torch.tensor(xscaler.fit_transform(X)), y)
        mll = ExactMarginalLogLikelihood(gp.likelihood, gp)
        fit_gpytorch_model(mll)

        cEI = ConstrainedExpectedImprovement(gp, y[:, 0].max().item(), 0,
                                             {1: (-0.05, 0.05)})
        bounds = torch.stack([bounds_dict[x] for x in X_df.columns])
        candidate, _ = optimize_acqf(cEI, bounds.T, 1, 100, 1024)
        inv_candidate = xscaler.inverse_transform(candidate)

        hyp = {k: v.item() for k, v in zip(X_df.columns, inv_candidate[0])}
        logger.info(
            f'(Iteration {it+config["fairBO"]["initial_budget"]}) Evaluating fairBO with sample {hyp}'
        )

        X = torch.cat((X, candidate))

        y_eval, model_candidate, thresh = evaluate(**hyp)
        logger.info(f'Result: {y_eval}')
        if y_eval['objective'] > best_model[1]:
            best_model[0] = copy.deepcopy(model_candidate)
            best_model[1] = y_eval['objective']
            best_model[2] = thresh
        y = torch.cat(
            (y, torch.tensor([[y_eval['performance'], y_eval['bias']]])))

    logger.info('Evaluating best fairBO debiased model.')
    best_model[0].eval()
    with torch.no_grad():
        y_pred = (best_model[0](data.X_valid_gpu)[:, 0] >
                  best_model[2]).reshape(-1).cpu().numpy()
    results_valid = get_valid_objective(y_pred, data, config)
    logger.info(f'Results: {results_valid}')

    best_model[0].eval()
    with torch.no_grad():
        y_pred = (best_model[0](data.X_test_gpu)[:, 0] >
                  best_model[2]).reshape(-1).cpu().numpy()
    results_test = get_test_objective(y_pred, data, config)
    return results_valid, results_test
def random_debiasing(model_state_dict, data, config, device, verbose=True):
    logger.info('Generating Random Debiased models.')
    rand_model = load_model(data.num_features,
                            config.get('hyperparameters', {}))
    rand_model.to(device)
    rand_result = {
        'objective': -math.inf,
        'model': rand_model.state_dict(),
        'thresh': -1
    }
    for iteration in range(config['random']['num_trials']):
        rand_model.load_state_dict(model_state_dict)
        for param in rand_model.parameters():
            param.data = param.data * (
                torch.randn_like(param) * config['random']['stddev'] + 1)

        rand_model.eval()
        with torch.no_grad():
            scores = rand_model(data.X_valid_gpu)[:,
                                                  0].reshape(-1).cpu().numpy()

        threshs = np.linspace(0, 1, 501)
        best_rand_thresh, best_obj = get_best_thresh(
            scores,
            threshs,
            data,
            config,
            valid=False,
            margin=config['random']['margin'])
        if best_obj > rand_result['objective']:
            rand_result = {
                'objective': best_obj,
                'model': copy.deepcopy(rand_model.state_dict()),
                'thresh': best_rand_thresh
            }
            rand_model.eval()
            with torch.no_grad():
                y_pred = (rand_model(data.X_test_gpu)[:, 0] >
                          best_rand_thresh).reshape(-1).cpu().numpy()
            best_test_result = get_test_objective(y_pred, data,
                                                  config)['objective']

        if iteration % 10 == 0 and verbose:
            logger.info(
                f'{iteration} / {config["random"]["num_trials"]} trials have been sampled.'
            )
            logger.info(f'Best result so far = {rand_result["objective"]}')
            logger.info(f'Best test result so = {best_test_result}')

    logger.info('Evaluating best random debiased model.')
    rand_model.load_state_dict(rand_result['model'])
    rand_model.eval()
    with torch.no_grad():
        y_pred = (rand_model(data.X_valid_gpu)[:, 0] >
                  rand_result['thresh']).reshape(-1).cpu().numpy()
    results_valid = get_valid_objective(y_pred, data, config)
    logger.info(f'Results: {results_valid}')

    rand_model.eval()
    with torch.no_grad():
        y_pred = (rand_model(data.X_test_gpu)[:, 0] >
                  rand_result['thresh']).reshape(-1).cpu().numpy()
    results_test = get_test_objective(y_pred, data, config)

    return results_valid, results_test
Exemple #4
0
def adversarial_debiasing(model_state_dict, data, config, device):
    logger.info('Training Adversarial model.')
    actor = load_model(data.num_features, config.get('hyperparameters', {}))
    actor.load_state_dict(model_state_dict)
    actor.to(device)
    hid = config['hyperparameters'][
        'hid'] if 'hyperparameters' in config else 32
    critic = Critic(hid * config['adversarial']['batch_size'],
                    num_deep=config['adversarial']['num_deep'],
                    hid=hid)
    critic.to(device)
    critic_optimizer = optim.Adam(critic.parameters())
    critic_loss_fn = torch.nn.MSELoss()

    actor_optimizer = optim.Adam(actor.parameters(),
                                 lr=config['adversarial']['lr'])
    actor_loss_fn = torch.nn.BCELoss()

    for epoch in range(config['adversarial']['epochs']):
        for param in critic.parameters():
            param.requires_grad = True
        for param in actor.parameters():
            param.requires_grad = False
        actor.eval()
        critic.train()
        for step in range(config['adversarial']['critic_steps']):
            critic_optimizer.zero_grad()
            indices = torch.randint(0, data.X_valid.size(0),
                                    (config['adversarial']['batch_size'], ))
            cX_valid = data.X_valid_gpu[indices]
            cy_valid = data.y_valid[indices]
            cp_valid = data.p_valid[indices]
            with torch.no_grad():
                scores = actor(cX_valid)[:, 0].reshape(-1).cpu().numpy()

            bias = compute_bias(scores, cy_valid.numpy(), cp_valid,
                                config['metric'])

            res = critic(actor.trunc_forward(cX_valid))
            loss = critic_loss_fn(torch.tensor([bias], device=device), res[0])
            loss.backward()
            train_loss = loss.item()
            critic_optimizer.step()
            if (epoch % 10 == 0) and (step % 100 == 0):
                logger.info(
                    f'=======> Critic Epoch: {(epoch, step)} loss: {train_loss}'
                )

        for param in critic.parameters():
            param.requires_grad = False
        for param in actor.parameters():
            param.requires_grad = True
        actor.train()
        critic.eval()
        for step in range(config['adversarial']['actor_steps']):
            actor_optimizer.zero_grad()
            indices = torch.randint(0, data.X_valid.size(0),
                                    (config['adversarial']['batch_size'], ))
            cy_valid = data.y_valid_gpu[indices]
            cX_valid = data.X_valid_gpu[indices]

            pred_bias = critic(actor.trunc_forward(cX_valid))
            bceloss = actor_loss_fn(actor(cX_valid)[:, 0], cy_valid)

            # loss = lam*abs(pred_bias) + (1-lam)*loss
            objloss = max(
                1, config['adversarial']['lambda'] *
                (abs(pred_bias[0][0]) - config['objective']['epsilon'] +
                 config['adversarial']['margin']) + 1) * bceloss

            objloss.backward()
            train_loss = objloss.item()
            actor_optimizer.step()
            if (epoch % 10 == 0) and (step % 100 == 0):
                logger.info(
                    f'=======> Actor Epoch: {(epoch, step)} loss: {train_loss}'
                )

        if epoch % 10 == 0:
            with torch.no_grad():
                scores = actor(data.X_valid_gpu)[:,
                                                 0].reshape(-1,
                                                            1).cpu().numpy()
                _, best_adv_obj = get_best_thresh(
                    scores,
                    np.linspace(0, 1, 1001),
                    data,
                    config,
                    valid=False,
                    margin=config['adversarial']['margin'])
                logger.info(f'Objective: {best_adv_obj}')

    logger.info('Finding optimal threshold for Adversarial model.')
    with torch.no_grad():
        scores = actor(data.X_valid_gpu)[:, 0].reshape(-1, 1).cpu().numpy()

    best_adv_thresh, _ = get_best_thresh(
        scores,
        np.linspace(0, 1, 1001),
        data,
        config,
        valid=False,
        margin=config['adversarial']['margin'])

    logger.info('Evaluating Adversarial model on best threshold.')
    with torch.no_grad():
        labels = (actor(data.X_valid_gpu)[:, 0] > best_adv_thresh).reshape(
            -1, 1).cpu().numpy()
    results_valid = get_valid_objective(labels, data, config)
    logger.info(f'Results: {results_valid}')

    with torch.no_grad():
        labels = (actor(data.X_test_gpu)[:, 0] > best_adv_thresh).reshape(
            -1, 1).cpu().numpy()
    results_test = get_test_objective(labels, data, config)

    return results_valid, results_test
def layerwiseOpt_debiasing(model_state_dict, data, config, device):
    logger.info('Training layerwiseOpt model.')
    base_model = load_model(data.num_features,
                            config.get('hyperparameters', {}))
    base_model.load_state_dict(model_state_dict)
    base_model.to(device)
    best_state_dict, best_obj, best_thresh = None, math.inf, -1

    total_params = len(list(base_model.parameters()))
    for index, param in enumerate(base_model.parameters()):
        if index < total_params - config['layerwiseOpt']['num_layers']:
            continue
        logger.info(f'Evaluating param number {index} of {total_params}')
        param_copy = copy.deepcopy(param)

        def objective(new_param, return_thresh=False):
            param.data[indices] = torch.tensor(new_param)
            base_model.eval()
            with torch.no_grad():
                scores = base_model(data.X_valid_gpu)[:, 0].reshape(-1).numpy()
            best_thresh, best_obj = get_best_thresh(
                scores,
                np.linspace(0, 1, 501),
                data,
                config,
                valid=False,
                margin=config['layerwiseOpt']['margin'])
            print(f'Evaluating param number {index} of {total_params}')
            if return_thresh:
                return -float(best_obj), float(best_thresh)
            return -float(best_obj)

        mean = param.flatten().cpu().detach().numpy().mean()
        std = param.flatten().cpu().detach().numpy().std()
        num_elems = param.size().numel()
        ratio = min(1., config['layerwiseOpt']['max_sparsity'] / num_elems)
        indices = torch.rand(param.size()) < ratio
        space = [
            Real(
                float(x.cpu().detach()) - 2.2 * std,
                float(x.cpu().detach()) + 2.2 * std) for x in param[indices]
        ]

        # std = param.flatten().cpu().detach().numpy().std()
        # num_elems = param.size().numel()
        # ratio = min(1., config['layerwiseOpt']['max_sparsity'] / num_elems)
        # indices = torch.rand(param.size()) < ratio
        logger.info(f'Number of sparse indices: {indices.sum().item()}')
        res_gbrt = gbrt_minimize(objective,
                                 space,
                                 n_calls=config['layerwiseOpt']['n_calls'],
                                 verbose=True)

        if res_gbrt.fun < best_obj:
            param.data[indices] = torch.tensor(res_gbrt.x)
            best_state_dict = base_model.state_dict()
            best_obj, best_thresh = objective(res_gbrt.x, return_thresh=True)
            best_obj = -best_obj
        param.data = param_copy.data

    best_model = load_model(data.num_features,
                            config.get('hyperparameters', {}))
    best_model.to(device)
    best_model.load_state_dict(best_state_dict)
    best_model.eval()
    with torch.no_grad():
        y_pred = (best_model(data.X_valid_gpu)[:, 0] >
                  best_thresh).reshape(-1).numpy()
    results_valid = get_valid_objective(y_pred, data, config)

    best_model.eval()
    with torch.no_grad():
        y_pred = (best_model(data.X_test_gpu)[:, 0] >
                  best_thresh).reshape(-1).numpy()
    results_test = get_test_objective(y_pred, data, config)

    return results_valid, results_test
Exemple #6
0
def mitigating_debiasing(model_state_dict, data, config, device):
    logger.info('Training Mitigating model.')
    actor = load_model(data.num_features, config.get('hyperparameters', {}))
    actor.load_state_dict(model_state_dict)
    actor.to(device)
    critic = nn.Sequential(nn.Linear(32, 32), nn.Dropout(0.2), nn.ReLU(),
                           nn.Linear(32, 32), nn.Dropout(0.2), nn.ReLU(),
                           nn.Linear(32, 32), nn.Dropout(0.2), nn.ReLU(),
                           nn.Linear(32, 2), nn.Softmax())
    critic.to(device)
    critic_optimizer = optim.Adam(critic.parameters())
    critic_loss_fn = torch.nn.BCELoss()

    actor_optimizer = optim.Adam(actor.parameters(),
                                 lr=config['mitigating']['lr'])
    actor_loss_fn = torch.nn.BCELoss()

    for epoch in range(config['mitigating']['epochs']):
        for param in critic.parameters():
            param.requires_grad = True
        for param in actor.parameters():
            param.requires_grad = False
        actor.eval()
        critic.train()
        for step in range(config['mitigating']['critic_steps']):
            critic_optimizer.zero_grad()
            indices = torch.randint(0, data.X_valid.size(0),
                                    (config['mitigating']['batch_size'], ))
            cy_valid = data.y_valid_gpu[indices]
            cX_valid = data.X_valid_gpu[indices]
            cp_valid = data.p_valid_gpu[indices]
            with torch.no_grad():
                scores = actor(cX_valid)[:, 0].reshape(-1).cpu().numpy()

            res = critic(actor.trunc_forward(cX_valid))
            loss = critic_loss_fn(res[:, 0], cp_valid.type(torch.float32))
            loss.backward()
            train_loss = loss.item()
            critic_optimizer.step()
            if (epoch % 5 == 0) and (step % 100 == 0):
                logger.info(
                    f'=======> Critic Epoch: {(epoch, step)} loss: {train_loss}'
                )

        for param in critic.parameters():
            param.requires_grad = False
        for param in actor.parameters():
            param.requires_grad = True
        actor.train()
        critic.eval()
        for step in range(config['mitigating']['actor_steps']):
            actor_optimizer.zero_grad()
            indices = torch.randint(0, data.X_valid.size(0),
                                    (config['mitigating']['batch_size'], ))
            cy_valid = data.y_valid_gpu[indices]
            cX_valid = data.X_valid_gpu[indices]
            cp_valid = data.p_valid_gpu[indices]

            cx_predict = actor(cX_valid)
            loss_pred = actor_loss_fn(cx_predict[:, 0], cy_valid)

            cp_predict = critic(actor.trunc_forward(cX_valid))
            loss_adv = critic_loss_fn(cp_predict[:, 0],
                                      cp_valid.type(torch.float32))

            for param in actor.parameters():
                try:
                    lp = torch.autograd.grad(loss_pred,
                                             param,
                                             retain_graph=True)[0]
                    la = torch.autograd.grad(loss_adv,
                                             param,
                                             retain_graph=True)[0]
                except RuntimeError:
                    continue
                shape = la.shape
                lp = lp.flatten()
                la = la.flatten()
                lp_proj = (lp.T @ la) * la
                grad = lp - lp_proj - config['mitigating']['alpha'] * la
                grad = grad.reshape(shape)
                param.backward(grad)

            actor_optimizer.step()
            if (epoch % 5 == 0) and (step % 100 == 0):
                logger.info(f'=======> Actor Epoch: {(epoch, step)}')

        if epoch % 5 == 0:
            with torch.no_grad():
                scores = actor(data.X_valid_gpu)[:,
                                                 0].reshape(-1,
                                                            1).cpu().numpy()
                _, best_mit_obj = get_best_thresh(
                    scores,
                    np.linspace(0, 1, 1001),
                    data,
                    config,
                    valid=False,
                    margin=config['mitigating']['margin'])
                logger.info(f'Objective: {best_mit_obj}')

    logger.info('Finding optimal threshold for Mitigating model.')
    with torch.no_grad():
        scores = actor(data.X_valid_gpu)[:, 0].reshape(-1, 1).cpu().numpy()

    best_mit_thresh, _ = get_best_thresh(scores,
                                         np.linspace(0, 1, 1001),
                                         data,
                                         config,
                                         valid=False,
                                         margin=config['mitigating']['margin'])

    logger.info('Evaluating Mitigating model on best threshold.')
    with torch.no_grad():
        labels = (actor(data.X_valid_gpu)[:, 0] > best_mit_thresh).reshape(
            -1, 1).cpu().numpy()
    results_valid = get_valid_objective(labels, data, config)
    logger.info(f'Results: {results_valid}')

    with torch.no_grad():
        labels = (actor(data.X_test_gpu)[:, 0] > best_mit_thresh).reshape(
            -1, 1).cpu().numpy()
    results_test = get_test_objective(labels, data, config)

    return results_valid, results_test
Exemple #7
0
def main(config):

    seed = np.random.randint(0, high=10000)
    if 'seed' in config:
        seed = config['seed']
    torch.manual_seed(seed)
    np.random.seed(seed)

    # Setup directories to save models and results
    Path('models').mkdir(exist_ok=True)
    Path('results').mkdir(exist_ok=True)

    # Get Data
    logger.info(f'Loading Data from dataset: {config["dataset"]}.')
    data = Data(config, seed)

    # Get trained model
    model = load_model(data.num_features, config.get('hyperparameters', {}))
    model_path = (Path('models') / Path(config['modelpath']))
    if model_path.is_file():
        logger.info(f'Loading Model from {model_path}.')
        model.load_state_dict(torch.load(model_path))
    else:
        logger.info(
            f'{model_path} does not exist. Retraining model from scratch.')
        train_model(model, data, epochs=config.get('epochs', 1001))
        torch.save(model.state_dict(), model_path)
    model_state_dict = copy.deepcopy(model.state_dict())

    # Preliminaries
    logger.info('Setting up preliminaries.')
    model.eval()
    with torch.no_grad():

        valid_pred = data.valid.copy(deepcopy=True)
        valid_pred.scores = model(data.X_valid)[:, 0].reshape(-1, 1).numpy()
        valid_pred.labels = np.array(valid_pred.scores > 0.5)

        test_pred = data.test.copy(deepcopy=True)
        test_pred.scores = model(data.X_test)[:, 0].reshape(-1, 1).numpy()
        test_pred.labels = np.array(test_pred.scores > 0.5)

    results_valid = {}
    results_test = {}

    # Evaluate default model
    if 'default' in config['models']:
        logger.info(
            'Finding best threshold for default model to minimize objective function'
        )
        threshs = np.linspace(0, 1, 1001)
        performances = []
        for thresh in threshs:
            perf = balanced_accuracy_score(data.y_valid,
                                           valid_pred.scores > thresh)
            performances.append(perf)
        best_thresh = threshs[np.argmax(performances)]

        logger.info('Evaluating default model with best threshold.')
        results_valid['default'] = get_valid_objective(
            valid_pred.scores > best_thresh, data, config)
        logger.info(f'Results: {results_valid["default"]}')

        results_test['default'] = get_test_objective(
            test_pred.scores > best_thresh, data, config)

    # Evaluate ROC
    if 'ROC' in config['models']:
        metric_map = {
            'spd': 'Statistical parity difference',
            'aod': 'Average odds difference',
            'eod': 'Equal opportunity difference'
        }
        ROC = RejectOptionClassification(
            unprivileged_groups=data.unpriv,
            privileged_groups=data.priv,
            low_class_thresh=0.01,
            high_class_thresh=0.99,
            num_class_thresh=100,
            num_ROC_margin=50,
            metric_name=metric_map[config['metric']],
            metric_ub=0.05,
            metric_lb=-0.05)

        logger.info('Training ROC model with validation dataset.')
        ROC = ROC.fit(data.valid, valid_pred)

        logger.info('Evaluating ROC model.')
        y_pred = ROC.predict(valid_pred).labels.reshape(-1)
        results_valid['ROC'] = get_valid_objective(y_pred, data, config)
        logger.info(f'Results: {results_valid["ROC"]}')

        y_pred = ROC.predict(test_pred).labels.reshape(-1)
        results_test['ROC'] = get_test_objective(y_pred, data, config)
        ROC = None

    # Evaluate Equality of Odds
    if 'EqOdds' in config['models']:
        eqodds = EqOddsPostprocessing(privileged_groups=data.priv,
                                      unprivileged_groups=data.unpriv)

        logger.info('Training Equality of Odds model with validation dataset.')
        eqodds = eqodds.fit(data.valid, valid_pred)

        logger.info('Evaluating Equality of Odds model.')
        y_pred = eqodds.predict(valid_pred).labels.reshape(-1)
        results_valid['EqOdds'] = get_valid_objective(y_pred, data, config)
        logger.info(f'Results: {results_valid["EqOdds"]}')

        y_pred = eqodds.predict(test_pred).labels.reshape(-1)
        results_test['EqOdds'] = get_test_objective(y_pred, data, config)
        eqodds = None

    # Evaluate Calibrated Equality of Odds
    if 'CalibEqOdds' in config['models']:
        cost_constraint = config['CalibEqOdds']['cost_constraint']

        cpp = CalibratedEqOddsPostprocessing(privileged_groups=data.priv,
                                             unprivileged_groups=data.unpriv,
                                             cost_constraint=cost_constraint)

        logger.info(
            'Training Calibrated Equality of Odds model with validation dataset.'
        )
        cpp = cpp.fit(data.valid, valid_pred)

        logger.info('Evaluating Calibrated Equality of Odds model.')
        y_pred = cpp.predict(valid_pred).labels.reshape(-1)
        results_valid['CalibEqOdds'] = get_valid_objective(
            y_pred, data, config)
        logger.info(f'Results: {results_valid["CalibEqOdds"]}')

        y_pred = cpp.predict(test_pred).labels.reshape(-1)
        results_test['CalibEqOdds'] = get_test_objective(y_pred, data, config)

        cpp = None

    # Evaluate Random Debiasing
    if 'random' in config['models']:
        from algorithms.random import random_debiasing
        results_valid['random'], results_test['random'] = random_debiasing(
            model_state_dict, data, config, device)

    # Evaluate fairBO
    if 'fairBO' in config['models']:
        from algorithms.fairBO import fairBO_debiasing
        results_valid['fairBO'], results_test['fairBO'] = fairBO_debiasing(
            model_state_dict, data, config, device)

    # Evaluate Layerwise Optimizer
    if 'layerwiseOpt' in config['models']:
        from algorithms.layerwiseOpt import layerwiseOpt_debiasing
        results_valid['layerwiseOpt'], results_test[
            'layerwiseOpt'] = layerwiseOpt_debiasing(model_state_dict, data,
                                                     config, device)

    # Evaluate Adversarial
    if 'adversarial' in config['models']:
        from algorithms.adversarial import adversarial_debiasing
        results_valid['adversarial'], results_test[
            'adversarial'] = adversarial_debiasing(model_state_dict, data,
                                                   config, device)

    # Mitigating Unwanted Biases with Adversarial Learning
    if 'mitigating' in config['models']:
        from algorithms.mitigating import mitigating_debiasing
        results_valid['mitigating'], results_test[
            'mitigating'] = mitigating_debiasing(model_state_dict, data,
                                                 config, device)

    # Save Results
    results_valid['config'] = config
    logger.info(f'Validation Results: {results_valid}')
    logger.info(
        f'Saving validation results to {config["experiment_name"]}_valid_output.json'
    )
    with open(
            Path('results') / f'{config["experiment_name"]}_valid_output.json',
            'w') as fh:
        json.dump(results_valid, fh)

    results_test['config'] = config
    logger.info(f'Test Results: {results_test}')
    logger.info(
        f'Saving validation results to {config["experiment_name"]}_test_output.json'
    )
    with open(
            Path('results') / f'{config["experiment_name"]}_test_output.json',
            'w') as fh:
        json.dump(results_test, fh)