Example #1
0
def model_sensitivity_method(data, args, visualizer=None, title=None):
    """
    Given a dataset `data` and arguments `args`, run a full test of private
    prediction using the model sensitivity method.

    Note: This algorithm only guarantees privacy for models with convex losses.
    """
    assert args.model == "linear", f"Model {args.model} not supported."

    # initialize model and criterion:
    num_classes = int(data["train"]["targets"].max()) + 1
    num_samples, num_features = data["train"]["features"].size()
    model = modeling.initialize_model(num_features,
                                      num_classes,
                                      device=args.device)
    criterion = nn.CrossEntropyLoss()
    regularized_criterion = modeling.add_l2_regularization(
        criterion, model, args.weight_decay)

    # train classifier:
    logging.info("Training non-private classifier...")
    modeling.train_model(model,
                         data["train"],
                         criterion=regularized_criterion,
                         optimizer=args.optimizer,
                         num_epochs=args.num_epochs,
                         learning_rate=args.learning_rate,
                         batch_size=args.batch_size,
                         visualizer=visualizer,
                         title=title)

    # perturb model parameters:
    logging.info("Applying model sensitivity method...")
    scale = sensitivity_scale(args.epsilon, args.delta, args.weight_decay,
                              criterion, num_samples, args.noise_dist)
    param = modeling.get_parameter_vector(model)
    mean = torch.zeros_like(param)
    noise_dist = "gaussian" if args.noise_dist in ["gaussian", "advanced_gaussian"] \
        else args.noise_dist
    perturbation = getattr(noise, noise_dist)(mean, scale)

    with torch.no_grad():
        param.add_(perturbation)
    modeling.set_parameter_vector(model, param)

    # perform inference on both training and test set:
    logging.info("Performing inference with perturbed predictor...")
    predictions = {
        split: modeling.test_model(model, data_split).argmax(dim=1)
        for split, data_split in data.items()
    }
    return predictions
Example #2
0
def dpsgd_method(data, args, visualizer=None, title=None):
    """
    Given a dataset `data` and arguments `args`, run a full test of private
    prediction using the differentially private SGD training method of dpsgd
    et al. (2016).
    """

    # assertions:
    if args.optimizer != "sgd":
        raise ValueError(
            f"DP-SGD does not work with {args.optimizer} optimizer.")
    if args.delta <= 0.:
        raise ValueError(
            f"Specified delta must be positive (not {args.delta}).")

    # initialize model and criterion:
    num_classes = int(data["train"]["targets"].max()) + 1
    num_samples = data["train"]["features"].size(0)
    num_features = data["train"]["features"].size(1)
    model = modeling.initialize_model(num_features,
                                      num_classes,
                                      model=args.model,
                                      device=args.device)
    regularized_criterion = modeling.add_l2_regularization(
        nn.CrossEntropyLoss(), model, args.weight_decay)

    # compute standard deviation of noise to add to gradient:
    num_samples = data["train"]["features"].size(0)
    std, eps = dpsgd_privacy.compute_noise_multiplier(args.epsilon, args.delta,
                                                      num_samples,
                                                      args.batch_size,
                                                      args.num_epochs)
    logging.info(f"DP-SGD with noise multiplier (sigma) of {std}.")
    logging.info(f"Epsilon error is {abs(eps - args.epsilon):.5f}.")

    # convert model to make differentially private gradient updates:
    model = modeling.privatize_model(model, args.clip, std)

    # train classifier:
    logging.info("Training classifier using private SGD...")
    augmentation = (args.model != "linear")
    modeling.train_model(model,
                         data["train"],
                         optimizer=args.optimizer,
                         criterion=regularized_criterion,
                         num_epochs=args.num_epochs,
                         learning_rate=args.learning_rate,
                         batch_size=args.batch_size,
                         momentum=0.0,
                         use_lr_scheduler=args.use_lr_scheduler,
                         augmentation=augmentation,
                         visualizer=visualizer,
                         title=title)

    # convert model back to "regular" model:
    model = modeling.unprivatize_model(model)

    # perform inference on both training and test set:
    logging.info("Performing inference with DP-SGD predictor...")
    predictions = {
        split: modeling.test_model(model,
                                   data_split,
                                   augmentation=augmentation).argmax(dim=1)
        for split, data_split in data.items()
    }
    return predictions
Example #3
0
def logit_sensitivity_method(data, args, visualizer=None, title=None):
    """
    Given a dataset `data` and arguments `args`, run a full test of the logit
    sensitivity method. Returns a `dict` containing the `predictions` for the
    training and test data.

    Note: This algorithm only guarantees privacy for models with convex losses.
    """
    assert args.model == "linear", f"Model {args.model} not supported."

    # unspecified inference budgets means we are trying many values:
    if args.inference_budget == -1:
        inference_budgets = INFERENCE_BUDGETS
    else:
        inference_budgets = [args.inference_budget]

    # initialize model and criterion:
    num_classes = int(data["train"]["targets"].max()) + 1
    num_samples, num_features = data["train"]["features"].size()
    model = modeling.initialize_model(num_features,
                                      num_classes,
                                      device=args.device)
    criterion = nn.CrossEntropyLoss()
    regularized_criterion = modeling.add_l2_regularization(
        criterion, model, args.weight_decay)

    # train classifier:
    logging.info("Training non-private classifier...")
    modeling.train_model(model,
                         data["train"],
                         criterion=regularized_criterion,
                         optimizer=args.optimizer,
                         num_epochs=args.num_epochs,
                         learning_rate=args.learning_rate,
                         batch_size=args.batch_size,
                         visualizer=visualizer,
                         title=title)

    # perform inference on both training and test set:
    logging.info("Performing inference with private predictor...")
    predictions = {}
    for split in data.keys():
        if split not in predictions:
            predictions[split] = {}
        for inference_budget in inference_budgets:

            # account for the budget in the noise scale:
            scale = sensitivity_scale(args.epsilon / float(inference_budget),
                                      args.delta / float(inference_budget),
                                      args.weight_decay, criterion,
                                      num_samples, args.noise_dist)
            if args.delta > 0:
                # linearly search for the optimal noise scale under advanced
                # composition:
                del_primes = torch.linspace(0, args.delta, 1000)[1:-1]
                ind_eps_del = [
                    advanced_compose(args.epsilon, args.delta,
                                     inference_budget, dp) for dp in del_primes
                ]
                scales = [
                    sensitivity_scale(epsilon, delta, args.weight_decay,
                                      criterion, num_samples, args.noise_dist)
                    for epsilon, delta in ind_eps_del
                ]
                # for small budgets the naive scale may be better:
                scale = max(max(scales), scale)

            # make private predictions:
            noise_dist = "gaussian" if args.noise_dist in ["gaussian", "advanced_gaussian"] \
                else args.noise_dist
            preds = modeling.test_model(model, data[split])
            mean = torch.zeros_like(preds).T
            preds += getattr(noise, noise_dist)(mean, scale).T

            # make private predictions:
            predictions[split][inference_budget] = preds.argmax(dim=1)

    # return predictions:
    return predictions
Example #4
0
def loss_perturbation_method(data, args, visualizer=None, title=None):
    """
    Given a dataset `data` and arguments `args`, run a full test of the private
    prediction algorithms of Chaudhuri et al. (2011) / Kifer et al. (2012)
    generalized to the multi-class setting. Returns a `dict` containing the
    `predictions` for the training and test data.

    Note: This algorithm only guarantees privacy under the following assumptions:
    - The loss is strictly convex and has a continuous Hessian.
    - The model is linear.
    - The inputs have a 2-norm restricted to be less than or equal 1.
    - The Lipschitz constant of the loss function and the spectral
        norm of the Hessian must be bounded.
    """
    assert args.model == "linear", f"Model {args.model} not supported."
    assert args.noise_dist != "advanced_gaussian", \
        "Advanced Gaussian method not supported for loss perturbation."

    # get dataset properties:
    num_classes = int(data["train"]["targets"].max()) + 1
    num_samples, num_features = data["train"]["features"].size()

    # initialize model and criterion:
    model = modeling.initialize_model(num_features,
                                      num_classes,
                                      device=args.device)
    criterion = nn.CrossEntropyLoss()

    precision, weight_decay = loss_perturbation_params(args.epsilon,
                                                       args.delta,
                                                       args.noise_dist,
                                                       criterion, num_samples,
                                                       num_classes)
    weight_decay = max(weight_decay, args.weight_decay)

    # sample loss perturbation vector:
    param = modeling.get_parameter_vector(model)
    mean = torch.zeros_like(param)
    perturbation = getattr(noise, args.noise_dist)(mean, precision)
    perturbations = [torch.zeros_like(p) for p in model.parameters()]
    modeling.set_parameter_vector(perturbations, perturbation)

    # closure implementing the loss-perturbation criterion:
    def loss_perturbation_criterion(predictions, targets):
        loss = criterion(predictions, targets)
        for param, perturb in zip(model.parameters(), perturbations):
            loss += ((param * perturb).sum() / num_samples)
        return loss

    # add L2-regularizer to the loss:
    regularized_criterion = modeling.add_l2_regularization(
        loss_perturbation_criterion, model, weight_decay)

    # train classifier:
    logging.info("Training classifier with loss perturbation...")
    modeling.train_model(model,
                         data["train"],
                         criterion=regularized_criterion,
                         optimizer=args.optimizer,
                         num_epochs=args.num_epochs,
                         learning_rate=args.learning_rate,
                         batch_size=args.batch_size,
                         visualizer=visualizer,
                         title=title)

    # perform inference on both training and test set:
    logging.info("Performing inference with loss-perturbed predictor...")
    predictions = {
        split: model(data_split["features"]).argmax(dim=1)
        for split, data_split in data.items()
    }
    return predictions
Example #5
0
def subsagg_method(data, args, visualizer=None, title=None):
    """
    Given a dataset `data` and arguments `args`, run a full test of the private
    prediction algorithm of Dwork & Feldman (2018). Returns a `dict` containing
    the `predictions` for the training and test data.
    """

    # unspecified inference budgets means we are trying many values:
    if args.inference_budget == -1:
        inference_budgets = INFERENCE_BUDGETS
    else:
        inference_budgets = [args.inference_budget]

    # split training set into disjoint subsets:
    data["split_train"] = split_dataset(data["train"], args.num_models)

    # train all classifiers:
    logging.info(f"Training {args.num_models} disjoint classifiers...")
    models = [None] * args.num_models
    for idx in range(args.num_models):

        # initialize model:
        logging.info(f" => training model {idx + 1} of {args.num_models}:")
        num_classes = int(data["train"]["targets"].max()) + 1
        num_features = data["split_train"][idx]["features"].size(1)
        models[idx] = modeling.initialize_model(num_features,
                                                num_classes,
                                                model=args.model,
                                                device=args.device)

        # train using L2-regularized loss:
        regularized_criterion = modeling.add_l2_regularization(
            nn.CrossEntropyLoss(), models[idx], args.weight_decay)
        augmentation = (args.model != "linear")
        modeling.train_model(models[idx],
                             data["split_train"][idx],
                             criterion=regularized_criterion,
                             optimizer=args.optimizer,
                             num_epochs=args.num_epochs,
                             learning_rate=args.learning_rate,
                             batch_size=args.batch_size,
                             augmentation=augmentation,
                             visualizer=visualizer,
                             title=title)

    # clean up:
    del data["split_train"]

    # perform inference on both training and test set:
    logging.info("Performing inference with private predictor...")
    predictions = {}
    for split in data.keys():

        # compute predictions of each model:
        batch_size = data[split]["targets"].size(
            0) if args.model == "linear" else 128
        preds = [
            modeling.test_model(
                model,
                data[split],
                augmentation=augmentation,
                batch_size=batch_size,
            ) for model in models
        ]
        preds = [pred.argmax(dim=1) for pred in preds]
        preds = torch.stack(preds, dim=1)

        # compute private predictions:
        if split not in predictions:
            predictions[split] = {}
        for inference_budget in inference_budgets:
            # privacy parameter must be corrected for inference budget:
            epsilon = args.epsilon / float(inference_budget)
            if args.delta > 0:
                eps, _ = advanced_compose(args.epsilon, args.delta,
                                          inference_budget, args.delta)
                epsilon = max(eps, epsilon)

            # compute and store private predictions:
            predictions[split][inference_budget] = \
                private_prediction(preds, epsilon=epsilon)

    # return predictions:
    return predictions