Esempio n. 1
0
def train(verbose=True):
    train, train_prot, test, test_prot = read_dataset(name='adult')
    x_train, x_val, y_train, y_val, prot_train, prot_val = train_test_split(
        train.drop(['Target'], axis=1),
        train['Target'],
        train_prot,
        test_size=0.2,
        random_state=SEED)

    input_size = train.shape[1] - 1
    num_classes = 2

    model = NetRegression(input_size, num_classes, arch=[120, 100]).to(DEVICE)
    criterion = nn.CrossEntropyLoss()
    optimiser = torch.optim.Adam(model.parameters(),
                                 lr=config.learning_rate,
                                 weight_decay=config.weight_reg)

    train_data = torch.tensor(x_train.values).float()
    train_target = torch.tensor(y_train.values).long()
    train_protect = torch.tensor(prot_train).float()
    train_tensor = data_utils.TensorDataset(train_data, train_target,
                                            train_protect)
    train_loader = data_utils.DataLoader(dataset=train_tensor,
                                         batch_size=config.batch_size,
                                         shuffle=True)

    best_loss = np.inf
    training_patience = 0
    with torch.autograd.set_detect_anomaly(True):
        for epoch in range(config.num_epochs):
            model.train()
            for i, (x, y, a) in enumerate(train_loader):
                x, y, a = x.to(DEVICE), y.to(DEVICE), a.to(DEVICE)

                optimiser.zero_grad()
                outputs = model(x)
                pred_loss = criterion(outputs, y)

                y = torch.unsqueeze(y, 1).double()
                a = torch.unsqueeze(a, 1)
                fair_loss = uncond_fair_loss(a, outputs)

                loss = pred_loss + config.fairness_reg * fair_loss
                loss.backward()
                optimiser.step()

                if verbose and i % 20 == 0:
                    acc = calc_accuracy(outputs, y)
                    print(
                        'Epoch: [%d/%d], Batch: [%d/%d], Loss: %.4f, Pred Loss: %.4f, Fair Loss: %.4f, Accuracy: %.4f'
                        % (epoch + 1, config.num_epochs, i,
                           len(x_train) // config.batch_size, loss.item(),
                           pred_loss.item(), fair_loss.item(), acc))

            val_results = evaluate_model(model,
                                         criterion,
                                         x_val,
                                         y_val,
                                         prot_val,
                                         type='classification',
                                         fairness='dp')
            print(
                '\t Validation Performance: Loss: %.4f, Accuracy: %.4f, DEO: %.4f, DI: %.4f, Fair-COCCO: %.6f'
                %
                (val_results['loss'], val_results['accuracy'],
                 val_results['deo'], val_results['di'], val_results['cocco']))

            if val_results['loss'] < best_loss:
                best_loss = val_results['loss']
                training_patience = 0
                torch.save(model.state_dict(), config.save_model_path)
            else:
                training_patience += 1
            if training_patience == config.patience:
                break

    print('\nTraining Complete, loading best model')
    model.load_state_dict(
        torch.load(config.save_model_path, map_location=torch.device(DEVICE)))
    test_results = evaluate_model(model,
                                  criterion,
                                  test.drop(['Target'], axis=1),
                                  test['Target'],
                                  test_prot,
                                  type='classification',
                                  fairness='dp')
    print(
        '\t Test Performance: Loss: %.4f, Accuracy: %.4f, DEO: %.4f, DI: %.4f, Fair-COCCO: %.6f'
        % (test_results['loss'], test_results['accuracy'], test_results['deo'],
           test_results['di'], test_results['cocco']))
Esempio n. 2
0
def train(verbose=True):
    x_train, x_val, x_test, y_train, y_val, y_test = read_dataset(name='crime')
    input_size = x_train.shape[1]
    num_classes = 1

    model = NetRegression(input_size, num_classes, arch=[100, 80]).to(DEVICE)
    criterion = nn.MSELoss()
    optimiser = torch.optim.Adam(model.parameters(),
                                 lr=config.learning_rate,
                                 weight_decay=config.weight_reg)

    train_target = torch.tensor(y_train.values).float()
    train_data = torch.tensor(x_train.values).float()
    train_protect = torch.tensor(x_train[[
        'racepctblack', 'racePctWhite', 'racePctAsian', 'racePctHisp'
    ]].values).float()

    train_tensor = data_utils.TensorDataset(train_data, train_target,
                                            train_protect)
    train_loader = data_utils.DataLoader(dataset=train_tensor,
                                         batch_size=config.batch_size,
                                         shuffle=True)

    best_loss = np.inf
    training_patience = 0
    with torch.autograd.set_detect_anomaly(True):
        for epoch in range(config.num_epochs):
            model.train()
            for i, (x, y, a) in enumerate(train_loader):
                x, y, a = x.to(DEVICE), y.to(DEVICE), a.to(DEVICE)

                optimiser.zero_grad()
                outputs = model(x)
                y = torch.unsqueeze(y, 1).float()
                pred_loss = criterion(outputs, y)

                fair_loss = cond_fair_loss(y, a, outputs)

                loss = pred_loss + config.fairness_reg * fair_loss
                loss.backward()
                optimiser.step()

                if verbose and i % 3 == 0:
                    print(
                        'Epoch: [%d/%d], Batch: [%d/%d], Loss: %.4f, Pred Loss: %.4f, Fair Loss: %.4f'
                        % (epoch + 1, config.num_epochs, i,
                           len(x_train) // config.batch_size, loss.item(),
                           pred_loss.item(), fair_loss.item()))

            with torch.no_grad():
                model.eval()
                val_target = torch.tensor(y_val.values).float().to(DEVICE)
                val_data = torch.tensor(x_val.values).float().to(DEVICE)

                a = torch.tensor(x_val[[
                    'racepctblack', 'racePctWhite', 'racePctAsian',
                    'racePctHisp'
                ]].values).float().to(DEVICE)
                outputs = model(val_data)
                y = torch.unsqueeze(val_target, 1).float()

                pred_loss = criterion(outputs, y)
                fair_loss = cond_fair_loss(y, a, outputs)
                val_loss = pred_loss + config.fairness_reg * fair_loss
                fair_cocco_score = compute_fair_cocco_cond(y, a, outputs)

                if verbose:
                    print(
                        '\t Validation Performance - Total Loss: %.4f, Pred Loss: %.4f, Fair Loss: %.4f, Fair-COCCO: %.4f'
                        % (val_loss.item(), pred_loss.item(), fair_loss.item(),
                           fair_cocco_score))

            if val_loss.item() < best_loss:
                best_loss = val_loss.item()
                training_patience = 0
                torch.save(model.state_dict(), config.save_model_path)
            else:
                training_patience += 1
            if training_patience == config.patience:
                break

    model.load_state_dict(
        torch.load(config.save_model_path, map_location=torch.device(DEVICE)))
    with torch.no_grad():
        model.eval()
        test_target = torch.tensor(y_test.values).float().to(DEVICE)
        test_data = torch.tensor(x_test.values).float().to(DEVICE)

        a = torch.tensor(x_test[[
            'racepctblack', 'racePctWhite', 'racePctAsian', 'racePctHisp'
        ]].values).float().to(DEVICE)
        outputs = model(test_data)
        y = torch.unsqueeze(test_target, 1).float()

        pred_loss = criterion(outputs, y)
        fair_loss = cond_fair_loss(y, a, outputs)
        test_loss = pred_loss + config.fairness_reg * fair_loss
        fair_cocco_score = compute_fair_cocco_cond(y, a, outputs)

        print(
            '\t [Test set] Joint Performance - Total Loss: %.4f, Pred Loss: %.4f, Fair Loss: %.4f, Fair-COCCO Score: %.4f'
            % (test_loss.item(), pred_loss.item(), fair_loss.item(),
               fair_cocco_score.item()))