def model_sensitivity_method(data, args, visualizer=None, title=None): """ Given a dataset `data` and arguments `args`, run a full test of private prediction using the model sensitivity method. Note: This algorithm only guarantees privacy for models with convex losses. """ assert args.model == "linear", f"Model {args.model} not supported." # initialize model and criterion: num_classes = int(data["train"]["targets"].max()) + 1 num_samples, num_features = data["train"]["features"].size() model = modeling.initialize_model(num_features, num_classes, device=args.device) criterion = nn.CrossEntropyLoss() regularized_criterion = modeling.add_l2_regularization( criterion, model, args.weight_decay) # train classifier: logging.info("Training non-private classifier...") modeling.train_model(model, data["train"], criterion=regularized_criterion, optimizer=args.optimizer, num_epochs=args.num_epochs, learning_rate=args.learning_rate, batch_size=args.batch_size, visualizer=visualizer, title=title) # perturb model parameters: logging.info("Applying model sensitivity method...") scale = sensitivity_scale(args.epsilon, args.delta, args.weight_decay, criterion, num_samples, args.noise_dist) param = modeling.get_parameter_vector(model) mean = torch.zeros_like(param) noise_dist = "gaussian" if args.noise_dist in ["gaussian", "advanced_gaussian"] \ else args.noise_dist perturbation = getattr(noise, noise_dist)(mean, scale) with torch.no_grad(): param.add_(perturbation) modeling.set_parameter_vector(model, param) # perform inference on both training and test set: logging.info("Performing inference with perturbed predictor...") predictions = { split: modeling.test_model(model, data_split).argmax(dim=1) for split, data_split in data.items() } return predictions
def dpsgd_method(data, args, visualizer=None, title=None): """ Given a dataset `data` and arguments `args`, run a full test of private prediction using the differentially private SGD training method of dpsgd et al. (2016). """ # assertions: if args.optimizer != "sgd": raise ValueError( f"DP-SGD does not work with {args.optimizer} optimizer.") if args.delta <= 0.: raise ValueError( f"Specified delta must be positive (not {args.delta}).") # initialize model and criterion: num_classes = int(data["train"]["targets"].max()) + 1 num_samples = data["train"]["features"].size(0) num_features = data["train"]["features"].size(1) model = modeling.initialize_model(num_features, num_classes, model=args.model, device=args.device) regularized_criterion = modeling.add_l2_regularization( nn.CrossEntropyLoss(), model, args.weight_decay) # compute standard deviation of noise to add to gradient: num_samples = data["train"]["features"].size(0) std, eps = dpsgd_privacy.compute_noise_multiplier(args.epsilon, args.delta, num_samples, args.batch_size, args.num_epochs) logging.info(f"DP-SGD with noise multiplier (sigma) of {std}.") logging.info(f"Epsilon error is {abs(eps - args.epsilon):.5f}.") # convert model to make differentially private gradient updates: model = modeling.privatize_model(model, args.clip, std) # train classifier: logging.info("Training classifier using private SGD...") augmentation = (args.model != "linear") modeling.train_model(model, data["train"], optimizer=args.optimizer, criterion=regularized_criterion, num_epochs=args.num_epochs, learning_rate=args.learning_rate, batch_size=args.batch_size, momentum=0.0, use_lr_scheduler=args.use_lr_scheduler, augmentation=augmentation, visualizer=visualizer, title=title) # convert model back to "regular" model: model = modeling.unprivatize_model(model) # perform inference on both training and test set: logging.info("Performing inference with DP-SGD predictor...") predictions = { split: modeling.test_model(model, data_split, augmentation=augmentation).argmax(dim=1) for split, data_split in data.items() } return predictions
def logit_sensitivity_method(data, args, visualizer=None, title=None): """ Given a dataset `data` and arguments `args`, run a full test of the logit sensitivity method. Returns a `dict` containing the `predictions` for the training and test data. Note: This algorithm only guarantees privacy for models with convex losses. """ assert args.model == "linear", f"Model {args.model} not supported." # unspecified inference budgets means we are trying many values: if args.inference_budget == -1: inference_budgets = INFERENCE_BUDGETS else: inference_budgets = [args.inference_budget] # initialize model and criterion: num_classes = int(data["train"]["targets"].max()) + 1 num_samples, num_features = data["train"]["features"].size() model = modeling.initialize_model(num_features, num_classes, device=args.device) criterion = nn.CrossEntropyLoss() regularized_criterion = modeling.add_l2_regularization( criterion, model, args.weight_decay) # train classifier: logging.info("Training non-private classifier...") modeling.train_model(model, data["train"], criterion=regularized_criterion, optimizer=args.optimizer, num_epochs=args.num_epochs, learning_rate=args.learning_rate, batch_size=args.batch_size, visualizer=visualizer, title=title) # perform inference on both training and test set: logging.info("Performing inference with private predictor...") predictions = {} for split in data.keys(): if split not in predictions: predictions[split] = {} for inference_budget in inference_budgets: # account for the budget in the noise scale: scale = sensitivity_scale(args.epsilon / float(inference_budget), args.delta / float(inference_budget), args.weight_decay, criterion, num_samples, args.noise_dist) if args.delta > 0: # linearly search for the optimal noise scale under advanced # composition: del_primes = torch.linspace(0, args.delta, 1000)[1:-1] ind_eps_del = [ advanced_compose(args.epsilon, args.delta, inference_budget, dp) for dp in del_primes ] scales = [ sensitivity_scale(epsilon, delta, args.weight_decay, criterion, num_samples, args.noise_dist) for epsilon, delta in ind_eps_del ] # for small budgets the naive scale may be better: scale = max(max(scales), scale) # make private predictions: noise_dist = "gaussian" if args.noise_dist in ["gaussian", "advanced_gaussian"] \ else args.noise_dist preds = modeling.test_model(model, data[split]) mean = torch.zeros_like(preds).T preds += getattr(noise, noise_dist)(mean, scale).T # make private predictions: predictions[split][inference_budget] = preds.argmax(dim=1) # return predictions: return predictions
def loss_perturbation_method(data, args, visualizer=None, title=None): """ Given a dataset `data` and arguments `args`, run a full test of the private prediction algorithms of Chaudhuri et al. (2011) / Kifer et al. (2012) generalized to the multi-class setting. Returns a `dict` containing the `predictions` for the training and test data. Note: This algorithm only guarantees privacy under the following assumptions: - The loss is strictly convex and has a continuous Hessian. - The model is linear. - The inputs have a 2-norm restricted to be less than or equal 1. - The Lipschitz constant of the loss function and the spectral norm of the Hessian must be bounded. """ assert args.model == "linear", f"Model {args.model} not supported." assert args.noise_dist != "advanced_gaussian", \ "Advanced Gaussian method not supported for loss perturbation." # get dataset properties: num_classes = int(data["train"]["targets"].max()) + 1 num_samples, num_features = data["train"]["features"].size() # initialize model and criterion: model = modeling.initialize_model(num_features, num_classes, device=args.device) criterion = nn.CrossEntropyLoss() precision, weight_decay = loss_perturbation_params(args.epsilon, args.delta, args.noise_dist, criterion, num_samples, num_classes) weight_decay = max(weight_decay, args.weight_decay) # sample loss perturbation vector: param = modeling.get_parameter_vector(model) mean = torch.zeros_like(param) perturbation = getattr(noise, args.noise_dist)(mean, precision) perturbations = [torch.zeros_like(p) for p in model.parameters()] modeling.set_parameter_vector(perturbations, perturbation) # closure implementing the loss-perturbation criterion: def loss_perturbation_criterion(predictions, targets): loss = criterion(predictions, targets) for param, perturb in zip(model.parameters(), perturbations): loss += ((param * perturb).sum() / num_samples) return loss # add L2-regularizer to the loss: regularized_criterion = modeling.add_l2_regularization( loss_perturbation_criterion, model, weight_decay) # train classifier: logging.info("Training classifier with loss perturbation...") modeling.train_model(model, data["train"], criterion=regularized_criterion, optimizer=args.optimizer, num_epochs=args.num_epochs, learning_rate=args.learning_rate, batch_size=args.batch_size, visualizer=visualizer, title=title) # perform inference on both training and test set: logging.info("Performing inference with loss-perturbed predictor...") predictions = { split: model(data_split["features"]).argmax(dim=1) for split, data_split in data.items() } return predictions
def subsagg_method(data, args, visualizer=None, title=None): """ Given a dataset `data` and arguments `args`, run a full test of the private prediction algorithm of Dwork & Feldman (2018). Returns a `dict` containing the `predictions` for the training and test data. """ # unspecified inference budgets means we are trying many values: if args.inference_budget == -1: inference_budgets = INFERENCE_BUDGETS else: inference_budgets = [args.inference_budget] # split training set into disjoint subsets: data["split_train"] = split_dataset(data["train"], args.num_models) # train all classifiers: logging.info(f"Training {args.num_models} disjoint classifiers...") models = [None] * args.num_models for idx in range(args.num_models): # initialize model: logging.info(f" => training model {idx + 1} of {args.num_models}:") num_classes = int(data["train"]["targets"].max()) + 1 num_features = data["split_train"][idx]["features"].size(1) models[idx] = modeling.initialize_model(num_features, num_classes, model=args.model, device=args.device) # train using L2-regularized loss: regularized_criterion = modeling.add_l2_regularization( nn.CrossEntropyLoss(), models[idx], args.weight_decay) augmentation = (args.model != "linear") modeling.train_model(models[idx], data["split_train"][idx], criterion=regularized_criterion, optimizer=args.optimizer, num_epochs=args.num_epochs, learning_rate=args.learning_rate, batch_size=args.batch_size, augmentation=augmentation, visualizer=visualizer, title=title) # clean up: del data["split_train"] # perform inference on both training and test set: logging.info("Performing inference with private predictor...") predictions = {} for split in data.keys(): # compute predictions of each model: batch_size = data[split]["targets"].size( 0) if args.model == "linear" else 128 preds = [ modeling.test_model( model, data[split], augmentation=augmentation, batch_size=batch_size, ) for model in models ] preds = [pred.argmax(dim=1) for pred in preds] preds = torch.stack(preds, dim=1) # compute private predictions: if split not in predictions: predictions[split] = {} for inference_budget in inference_budgets: # privacy parameter must be corrected for inference budget: epsilon = args.epsilon / float(inference_budget) if args.delta > 0: eps, _ = advanced_compose(args.epsilon, args.delta, inference_budget, args.delta) epsilon = max(eps, epsilon) # compute and store private predictions: predictions[split][inference_budget] = \ private_prediction(preds, epsilon=epsilon) # return predictions: return predictions