def kkt_setup(target_theta, target_bias, X_train, Y_train, X_test, Y_test,
              dataset_name, percentile, loss_percentile, model, model_grad,
              class_map, use_slab, use_loss):

    clean_grad_at_target_theta, clean_bias_grad_at_target_theta = model_grad(
        target_theta, target_bias, X_train, Y_train)

    losses_at_target = upper_bounds.indiv_hinge_losses(target_theta,
                                                       target_bias, X_train,
                                                       Y_train)

    sv_indices = losses_at_target > 0

    _, sv_centroids, _, sv_sphere_radii, _ = data.get_data_params(
        X_train[sv_indices, :], Y_train[sv_indices], percentile=percentile)

    max_losses = [0, 0]
    for y in set(Y_train):
        max_losses[class_map[y]] = np.percentile(
            losses_at_target[Y_train == y], loss_percentile)

    print('Max losses are: %s' % max_losses)
    model.coef_ = target_theta.reshape((1, -1))
    model.intercept_ = target_bias

    print('If we could get our targeted theta exactly:')
    print('Train            : %.3f' % model.score(X_train, Y_train))
    print('Test (overall)   : %.3f' % model.score(X_test, Y_test))

    two_class_kkt = upper_bounds.TwoClassKKT(
        clean_grad_at_target_theta.shape[0],
        dataset_name=dataset_name,
        X=X_train,
        use_slab=use_slab,
        constrain_max_loss=use_loss)

    target_bias_grad = clean_bias_grad_at_target_theta

    return two_class_kkt, clean_grad_at_target_theta, target_bias_grad, max_losses
Example #2
0
def find_feasible_label_flips_in_sphere(X, Y, percentile):
    class_map, centroids, centroid_vec, sphere_radii, slab_radii = data.get_data_params(
        X, Y, percentile=percentile)

    sphere_dists_flip = compute_dists_under_Q(X,
                                              -Y,
                                              Q=None,
                                              subtract_from_l2=False,
                                              centroids=centroids,
                                              class_map=class_map,
                                              norm=2)

    feasible_flipped_mask = np.zeros(X.shape[0], dtype=bool)

    for y in set(Y):
        class_idx_flip = class_map[-y]
        sphere_radius_flip = sphere_radii[class_idx_flip]

        feasible_flipped_mask[Y == y] = (sphere_dists_flip[Y == y] <=
                                         sphere_radius_flip)

    return feasible_flipped_mask
Example #3
0
    lower_avg_good_train_losses[0] = train_loss
    lower_avg_bad_train_losses[0] = 0
    lower_test_losses[0] = test_loss
    lower_overall_train_acc[0] = train_acc
    lower_good_train_acc[0] = train_acc
    lower_bad_train_acc[0] = 0
    lower_test_acc[0] = test_acc
    lower_params_norm_sq[0] = params_norm_sq
    lower_weight_decays[0] = weight_decay

# This is a hack that's needed because we subsequently
# do randomized rounding on the attack points, which
# pushes stuff out of the feasible set, so we need to
# set the percentile to be some conservative low amount
if (dataset_name == 'imdb') and (percentile == 70):
    class_map, centroids, centroid_vec, sphere_radii, _ = data.get_data_params(
        X_train, Y_train, percentile=15)
    _, _, _, _, slab_radii = data.get_data_params(X_train,
                                                  Y_train,
                                                  percentile=60)
else:
    class_map, centroids, centroid_vec, sphere_radii, slab_radii = data.get_data_params(
        X_train, Y_train, percentile=percentile)
max_iter = num_iter_after_burnin + num_iter_to_throw_out
needed_iter = int(
    np.round(np.max(epsilons) * X_train.shape[0]) + num_iter_to_throw_out)
assert max_iter >= needed_iter, 'Not enough samples; increase max_iter to at least %s.' % needed_iter

minimizer = upper_bounds.Minimizer(use_slab=not (ignore_slab))

for epsilon_idx, epsilon in enumerate(epsilons):
parser = argparse.ArgumentParser()
parser.add_argument('dataset_name',
                    help='One of: imdb, enron, dogfish, mnist_17')
args = parser.parse_args()
dataset_name = args.dataset_name

assert dataset_name in ['imdb', 'enron', 'dogfish', 'mnist_17']

print('=== Dataset: %s ===' % dataset_name)
epsilons = datasets.DATASET_EPSILONS[dataset_name]

X_train, Y_train, X_test, Y_test = datasets.load_dataset(dataset_name)

random_seed = 1

class_map, centroids, centroid_vec, sphere_radii, slab_radii = data.get_data_params(
    X_train, Y_train, percentile=70)

sphere_dists_flip = defenses.compute_dists_under_Q(X_train,
                                                   -Y_train,
                                                   Q=None,
                                                   subtract_from_l2=False,
                                                   centroids=centroids,
                                                   class_map=class_map,
                                                   norm=2)

slab_dists_flip = defenses.compute_dists_under_Q(X_train,
                                                 -Y_train,
                                                 Q=centroid_vec,
                                                 subtract_from_l2=False,
                                                 centroids=centroids,
                                                 class_map=class_map,
def kkt_setup(target_theta,
              target_bias,
              X_train,
              Y_train,
              X_test,
              Y_test,
              dataset_name,
              percentile,
              loss_percentile,
              model,
              model_grad,
              class_map,
              use_slab,
              use_loss,
              use_l2,
              x_pos_tuple=None,
              x_neg_tuple=None,
              model_type='svm'):

    clean_grad_at_target_theta, clean_bias_grad_at_target_theta = model_grad(
        target_theta, target_bias, X_train, Y_train)
    print(clean_bias_grad_at_target_theta.shape,
          clean_grad_at_target_theta.shape)

    if model_type == 'svm':
        losses_at_target = upper_bounds.indiv_hinge_losses(
            target_theta, target_bias, X_train, Y_train)
    elif model_type == 'lr':
        losses_at_target = upper_bounds.indiv_log_losses(
            target_theta, target_bias, X_train, Y_train)
    else:
        print("please select correct loss")
        raise NameError

    print("ind_log_loss shape", losses_at_target.shape)

    if model_type == 'svm':
        sv_indices = losses_at_target > 0
    else:
        sv_indices = np.arange(X_train.shape[0])

    _, sv_centroids, _, sv_sphere_radii, _ = data.get_data_params(
        X_train[sv_indices, :], Y_train[sv_indices], percentile=percentile)

    max_losses = [0, 0]
    for y in set(Y_train):
        max_losses[class_map[y]] = np.percentile(
            losses_at_target[Y_train == y], loss_percentile)

    print('Max losses are: %s' % max_losses)
    model.coef_ = target_theta.reshape((1, -1))
    model.intercept_ = target_bias

    print('If we could get our targeted theta exactly:')
    print('Train            : %.3f' % model.score(X_train, Y_train))
    print('Test (overall)   : %.3f' % model.score(X_test, Y_test))

    if model_type == 'svm':
        two_class_kkt = upper_bounds.TwoClassKKT(
            clean_grad_at_target_theta.shape[0],
            dataset_name=dataset_name,
            X=X_train,
            use_slab=use_slab,
            constrain_max_loss=use_loss,
            use_l2=use_l2,
            x_pos_tuple=x_pos_tuple,
            x_neg_tuple=x_neg_tuple,
            model_type=model_type)
    elif model_type == 'lr':
        # we don't use the cvx solver for logistic regression model
        two_class_kkt = None
    else:
        raise NotImplementedError

    target_bias_grad = clean_bias_grad_at_target_theta

    return two_class_kkt, clean_grad_at_target_theta, target_bias_grad, max_losses