def train(verbose=True): train, train_prot, test, test_prot = read_dataset(name='adult') x_train, x_val, y_train, y_val, prot_train, prot_val = train_test_split( train.drop(['Target'], axis=1), train['Target'], train_prot, test_size=0.2, random_state=SEED) input_size = train.shape[1] - 1 num_classes = 2 model = NetRegression(input_size, num_classes, arch=[120, 100]).to(DEVICE) criterion = nn.CrossEntropyLoss() optimiser = torch.optim.Adam(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_reg) train_data = torch.tensor(x_train.values).float() train_target = torch.tensor(y_train.values).long() train_protect = torch.tensor(prot_train).float() train_tensor = data_utils.TensorDataset(train_data, train_target, train_protect) train_loader = data_utils.DataLoader(dataset=train_tensor, batch_size=config.batch_size, shuffle=True) best_loss = np.inf training_patience = 0 with torch.autograd.set_detect_anomaly(True): for epoch in range(config.num_epochs): model.train() for i, (x, y, a) in enumerate(train_loader): x, y, a = x.to(DEVICE), y.to(DEVICE), a.to(DEVICE) optimiser.zero_grad() outputs = model(x) pred_loss = criterion(outputs, y) y = torch.unsqueeze(y, 1).double() a = torch.unsqueeze(a, 1) fair_loss = uncond_fair_loss(a, outputs) loss = pred_loss + config.fairness_reg * fair_loss loss.backward() optimiser.step() if verbose and i % 20 == 0: acc = calc_accuracy(outputs, y) print( 'Epoch: [%d/%d], Batch: [%d/%d], Loss: %.4f, Pred Loss: %.4f, Fair Loss: %.4f, Accuracy: %.4f' % (epoch + 1, config.num_epochs, i, len(x_train) // config.batch_size, loss.item(), pred_loss.item(), fair_loss.item(), acc)) val_results = evaluate_model(model, criterion, x_val, y_val, prot_val, type='classification', fairness='dp') print( '\t Validation Performance: Loss: %.4f, Accuracy: %.4f, DEO: %.4f, DI: %.4f, Fair-COCCO: %.6f' % (val_results['loss'], val_results['accuracy'], val_results['deo'], val_results['di'], val_results['cocco'])) if val_results['loss'] < best_loss: best_loss = val_results['loss'] training_patience = 0 torch.save(model.state_dict(), config.save_model_path) else: training_patience += 1 if training_patience == config.patience: break print('\nTraining Complete, loading best model') model.load_state_dict( torch.load(config.save_model_path, map_location=torch.device(DEVICE))) test_results = evaluate_model(model, criterion, test.drop(['Target'], axis=1), test['Target'], test_prot, type='classification', fairness='dp') print( '\t Test Performance: Loss: %.4f, Accuracy: %.4f, DEO: %.4f, DI: %.4f, Fair-COCCO: %.6f' % (test_results['loss'], test_results['accuracy'], test_results['deo'], test_results['di'], test_results['cocco']))
def train(verbose=True): x_train, x_val, x_test, y_train, y_val, y_test = read_dataset(name='crime') input_size = x_train.shape[1] num_classes = 1 model = NetRegression(input_size, num_classes, arch=[100, 80]).to(DEVICE) criterion = nn.MSELoss() optimiser = torch.optim.Adam(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_reg) train_target = torch.tensor(y_train.values).float() train_data = torch.tensor(x_train.values).float() train_protect = torch.tensor(x_train[[ 'racepctblack', 'racePctWhite', 'racePctAsian', 'racePctHisp' ]].values).float() train_tensor = data_utils.TensorDataset(train_data, train_target, train_protect) train_loader = data_utils.DataLoader(dataset=train_tensor, batch_size=config.batch_size, shuffle=True) best_loss = np.inf training_patience = 0 with torch.autograd.set_detect_anomaly(True): for epoch in range(config.num_epochs): model.train() for i, (x, y, a) in enumerate(train_loader): x, y, a = x.to(DEVICE), y.to(DEVICE), a.to(DEVICE) optimiser.zero_grad() outputs = model(x) y = torch.unsqueeze(y, 1).float() pred_loss = criterion(outputs, y) fair_loss = cond_fair_loss(y, a, outputs) loss = pred_loss + config.fairness_reg * fair_loss loss.backward() optimiser.step() if verbose and i % 3 == 0: print( 'Epoch: [%d/%d], Batch: [%d/%d], Loss: %.4f, Pred Loss: %.4f, Fair Loss: %.4f' % (epoch + 1, config.num_epochs, i, len(x_train) // config.batch_size, loss.item(), pred_loss.item(), fair_loss.item())) with torch.no_grad(): model.eval() val_target = torch.tensor(y_val.values).float().to(DEVICE) val_data = torch.tensor(x_val.values).float().to(DEVICE) a = torch.tensor(x_val[[ 'racepctblack', 'racePctWhite', 'racePctAsian', 'racePctHisp' ]].values).float().to(DEVICE) outputs = model(val_data) y = torch.unsqueeze(val_target, 1).float() pred_loss = criterion(outputs, y) fair_loss = cond_fair_loss(y, a, outputs) val_loss = pred_loss + config.fairness_reg * fair_loss fair_cocco_score = compute_fair_cocco_cond(y, a, outputs) if verbose: print( '\t Validation Performance - Total Loss: %.4f, Pred Loss: %.4f, Fair Loss: %.4f, Fair-COCCO: %.4f' % (val_loss.item(), pred_loss.item(), fair_loss.item(), fair_cocco_score)) if val_loss.item() < best_loss: best_loss = val_loss.item() training_patience = 0 torch.save(model.state_dict(), config.save_model_path) else: training_patience += 1 if training_patience == config.patience: break model.load_state_dict( torch.load(config.save_model_path, map_location=torch.device(DEVICE))) with torch.no_grad(): model.eval() test_target = torch.tensor(y_test.values).float().to(DEVICE) test_data = torch.tensor(x_test.values).float().to(DEVICE) a = torch.tensor(x_test[[ 'racepctblack', 'racePctWhite', 'racePctAsian', 'racePctHisp' ]].values).float().to(DEVICE) outputs = model(test_data) y = torch.unsqueeze(test_target, 1).float() pred_loss = criterion(outputs, y) fair_loss = cond_fair_loss(y, a, outputs) test_loss = pred_loss + config.fairness_reg * fair_loss fair_cocco_score = compute_fair_cocco_cond(y, a, outputs) print( '\t [Test set] Joint Performance - Total Loss: %.4f, Pred Loss: %.4f, Fair Loss: %.4f, Fair-COCCO Score: %.4f' % (test_loss.item(), pred_loss.item(), fair_loss.item(), fair_cocco_score.item()))