Exemple #1
0
def main(_):
    rng = random.PRNGKey(0)

    # Load MNIST dataset
    train_images, train_labels, test_images, test_labels = datasets.mnist()

    batch_size = 128
    batch_shape = (-1, 28, 28, 1)
    num_train = train_images.shape[0]
    num_complete_batches, leftover = divmod(num_train, batch_size)
    num_batches = num_complete_batches + bool(leftover)

    train_images = np.reshape(train_images, batch_shape)
    test_images = np.reshape(test_images, batch_shape)

    def data_stream():
        rng = npr.RandomState(0)
        while True:
            perm = rng.permutation(num_train)
            for i in range(num_batches):
                batch_idx = perm[i * batch_size:(i + 1) * batch_size]
                yield train_images[batch_idx], train_labels[batch_idx]

    batches = data_stream()

    # Model, loss, and accuracy functions
    init_random_params, predict = stax.serial(
        stax.Conv(32, (8, 8), strides=(2, 2), padding='SAME'), stax.Relu,
        stax.Conv(128, (6, 6), strides=(2, 2), padding='VALID'), stax.Relu,
        stax.Conv(128, (5, 5), strides=(1, 1), padding='VALID'), stax.Flatten,
        stax.Dense(128), stax.Relu, stax.Dense(10))

    def loss(params, batch):
        inputs, targets = batch
        preds = predict(params, inputs)
        return -np.mean(logsoftmax(preds) * targets)

    def accuracy(params, batch):
        inputs, targets = batch
        target_class = np.argmax(targets, axis=1)
        predicted_class = np.argmax(predict(params, inputs), axis=1)
        return np.mean(predicted_class == target_class)

    # Instantiate an optimizer
    opt_init, opt_update, get_params = optimizers.adam(0.001)

    @jit
    def update(i, opt_state, batch):
        params = get_params(opt_state)
        return opt_update(i, grad(loss)(params, batch), opt_state)

    # Initialize model
    _, init_params = init_random_params(rng, batch_shape)
    opt_state = opt_init(init_params)
    itercount = itertools.count()

    # Training loop
    print("\nStarting training...")
    for epoch in range(FLAGS.nb_epochs):
        start_time = time.time()
        for _ in range(num_batches):
            opt_state = update(next(itercount), opt_state, next(batches))
        epoch_time = time.time() - start_time

        # Evaluate model on clean data
        params = get_params(opt_state)
        train_acc = accuracy(params, (train_images, train_labels))
        test_acc = accuracy(params, (test_images, test_labels))

        # Evaluate model on adversarial data
        model_fn = lambda images: predict(params, images)
        test_images_fgm = fast_gradient_method(model_fn, test_images,
                                               FLAGS.eps, np.inf)
        test_images_pgd = projected_gradient_descent(model_fn, test_images,
                                                     FLAGS.eps, 0.01, 40,
                                                     np.inf)
        test_acc_fgm = accuracy(params, (test_images_fgm, test_labels))
        test_acc_pgd = accuracy(params, (test_images_pgd, test_labels))

        print("Epoch {} in {:0.2f} sec".format(epoch, epoch_time))
        print("Training set accuracy: {}".format(train_acc))
        print("Test set accuracy on clean examples: {}".format(test_acc))
        print("Test set accuracy on FGM adversarial examples: {}".format(
            test_acc_fgm))
        print("Test set accuracy on PGD adversarial examples: {}".format(
            test_acc_pgd))
def projected_gradient_descent(
    model_fn,
    x,
    eps,
    eps_iter,
    nb_iter,
    norm,
    clip_min=None,
    clip_max=None,
    y=None,
    targeted=False,
    rand_init=None,
    rand_minmax=0.3,
):
    """
    This class implements either the Basic Iterative Method
    (Kurakin et al. 2016) when rand_init is set to 0. or the
    Madry et al. (2017) method when rand_minmax is larger than 0.
    Paper link (Kurakin et al. 2016): https://arxiv.org/pdf/1607.02533.pdf
    Paper link (Madry et al. 2017): https://arxiv.org/pdf/1706.06083.pdf
    :param model_fn: a callable that takes an input tensor and returns the model logits.
    :param x: input tensor.
    :param eps: epsilon (input variation parameter); see https://arxiv.org/abs/1412.6572.
    :param eps_iter: step size for each attack iteration
    :param nb_iter: Number of attack iterations.
    :param norm: Order of the norm (mimics NumPy). Possible values: np.inf or 2.
    :param clip_min: (optional) float. Minimum float value for adversarial example components.
    :param clip_max: (optional) float. Maximum float value for adversarial example components.
    :param y: (optional) Tensor with true labels. If targeted is true, then provide the
              target label. Otherwise, only provide this parameter if you'd like to use true
              labels when crafting adversarial samples. Otherwise, model predictions are used
              as labels to avoid the "label leaking" effect (explained in this paper:
              https://arxiv.org/abs/1611.01236). Default is None.
    :param targeted: (optional) bool. Is the attack targeted or untargeted?
              Untargeted, the default, will try to make the label incorrect.
              Targeted will instead try to move in the direction of being more like y.
    :return: a tensor for the adversarial example
    """

    assert eps_iter <= eps, (eps_iter, eps)
    if norm == 1:
        raise NotImplementedError(
            "It's not clear that FGM is a good inner loop"
            " step for PGD when norm=1, because norm=1 FGM "
            " changes only one pixel at a time. We need "
            " to rigorously test a strong norm=1 PGD "
            "before enabling this feature.")
    if norm not in [np.inf, 2]:
        raise ValueError("Norm order must be either np.inf or 2.")

    # Initialize loop variables
    if rand_init:
        rand_minmax = eps
        eta = np.random.uniform(x.shape, -rand_minmax, rand_minmax)
    else:
        eta = np.zeros_like(x)

    # Clip eta
    eta = clip_eta(eta, norm, eps)
    adv_x = x + eta
    if clip_min is not None or clip_max is not None:
        adv_x = np.clip(adv_x, a_min=clip_min, a_max=clip_max)

    if y is None:
        # Using model predictions as ground truth to avoid label leaking
        x_labels = np.argmax(model_fn(x), 1)
        y = one_hot(x_labels, 10)

    for _ in range(nb_iter):
        adv_x = fast_gradient_method(
            model_fn,
            adv_x,
            eps_iter,
            norm,
            clip_min=clip_min,
            clip_max=clip_max,
            y=y,
            targeted=targeted,
        )

        # Clipping perturbation eta to norm norm ball
        eta = adv_x - x
        eta = clip_eta(eta, norm, eps)
        adv_x = x + eta

        # Redo the clipping.
        # FGM already did it, but subtracting and re-adding eta can add some
        # small numerical error.
        if clip_min is not None or clip_max is not None:
            adv_x = np.clip(adv_x, a_min=clip_min, a_max=clip_max)

    return adv_x
def projected_gradient_descent(
    model_fn,
    x,
    eps,
    eps_iter,
    nb_iter,
    norm,
    clip_min=None,
    clip_max=None,
    y=None,
    targeted=False,
    rand_init=None,
    rand_minmax=0.3,
):
    """
    This class implements either the Basic Iterative Method
    (Kurakin et al. 2016) when rand_init is set to 0. or the
    Madry et al. (2017) method when rand_minmax is larger than 0.
    Paper link (Kurakin et al. 2016): https://arxiv.org/pdf/1607.02533.pdf
    Paper link (Madry et al. 2017): https://arxiv.org/pdf/1706.06083.pdf
    :param model_fn: a callable that takes an input tensor and returns the model logits.
    :param x: input tensor.
    :param eps: epsilon (input variation parameter); see https://arxiv.org/abs/1412.6572.
    :param eps_iter: step size for each attack iteration
    :param nb_iter: Number of attack iterations.
    :param norm: Order of the norm (mimics NumPy). Possible values: np.inf or 2.
    :param clip_min: (optional) float. Minimum float value for adversarial example components.
    :param clip_max: (optional) float. Maximum float value for adversarial example components.
    :param y: (optional) Tensor with true labels. If targeted is true, then provide the
              target label. Otherwise, only provide this parameter if you'd like to use true
              labels when crafting adversarial samples. Otherwise, model predictions are used
              as labels to avoid the "label leaking" effect (explained in this paper:
              https://arxiv.org/abs/1611.01236). Default is None.
    :param targeted: (optional) bool. Is the attack targeted or untargeted?
              Untargeted, the default, will try to make the label incorrect.
              Targeted will instead try to move in the direction of being more like y.
    :return: a tensor for the adversarial example
    """
    BALL = isinstance(eps, float)
    assert (BALL or eps.shape
            == x.shape), "Eps must define an epsilon ball or an ellipsoid"
    assert (not BALL) or np.array(eps_iter <= eps).all(), (eps_iter, eps)
    if norm == 1:
        raise NotImplementedError(
            "It's not clear that FGM is a good inner loop"
            " step for PGD when norm=1, because norm=1 FGM "
            " changes only one pixel at a time. We need "
            " to rigorously test a strong norm=1 PGD "
            "before enabling this feature.")
    if norm not in [np.inf, 2]:
        raise ValueError("Norm order must be either np.inf or 2.")

    # Initialize loop variables
    if rand_init:
        rand_minmax = eps
        eta = np.random.uniform(x.shape, -rand_minmax, rand_minmax)
        eta = clip_eta(eta, norm, eps)
    else:
        eta = np.zeros_like(x)

    adv_x = x + eta
    if clip_min is not None or clip_max is not None:
        adv_x = np.clip(adv_x, a_min=clip_min, a_max=clip_max)

    if y is None:
        # Using model predictions as ground truth to avoid label leaking
        x_labels = np.argmax(model_fn(x), 1)
        y = one_hot(x_labels, 10)

    for _ in range(nb_iter):
        adv_x = fast_gradient_method(
            model_fn,
            adv_x,
            eps_iter,
            norm,
            clip_min=clip_min,
            clip_max=clip_max,
            y=y,
            targeted=targeted,
        )

        if not isinstance(eps, float) and (norm == 2):
            raise NotImplementedError
            # Projection onto the ellipsoid in l2
            """
            perturbation = Proj(x + grads) - x
            Optimization problem: x_{proj}* = arg min_{x_proj} .5 * ||x_{proj}-y||_2^2 s.t. (x_{proj}-c)' W (x_{proj}-c) <= 1 
            """
            adv_x *= eps_iter
            x_ = x.ravel()
            y_ = adv_x.ravel(
            )  # We want to project y back on the ellipsoid defined by eps
            w_ = 1 / (
                eps.ravel()**2 + 1e-12
            )  # Squared inverse of the diagonal matrix W that transforms the ball into an axis-aligned ellipsoid

            def f_and_g(x_p):
                g_ = x_p - y_
                f_ = .5 * np.linalg.norm(g_, ord=2)**2
                return f_, g_

            def functionValIneq(x_p):
                t0 = x_p - x_
                return np.dot(t0, w_ * t0)

            def gradientIneq(x_p):
                t0 = x_p - x_
                return 2 * (w_ * t0)

            x0 = onp.random.randn(x_.shape[0])
            bnds = [(-np.inf, np.inf)] * x_.shape[0]
            constraints = ({
                'type': 'ineq',
                'fun': lambda x: -functionValIneq(x),
                'jac': lambda x: -gradientIneq(x)
            })
            result = minimize(f_and_g,
                              x0,
                              jac=True,
                              method='SLSQP',
                              bounds=bnds,
                              constraints=constraints)
            x_p = result.x
            print("\nFunction value ineq.:", functionValIneq(x_p))
            adv_x = np.reshape(x_p, x.shape)

        # Clipping perturbation eta to norm norm ball
        eta = adv_x - x
        eta = clip_eta(eta, norm, eps)
        adv_x = x + eta

        # Redo the clipping.
        # FGM already did it, but subtracting and re-adding eta can add some
        # small numerical error.
        if clip_min is not None or clip_max is not None:
            adv_x = np.clip(adv_x, a_min=clip_min, a_max=clip_max)

    return adv_x
def main(_):
    rng = random.PRNGKey(0)

    # Load MNIST dataset
    train_images, train_labels, test_images, test_labels = datasets.mnist()

    batch_size = 128
    batch_shape = (-1, 28, 28, 1)
    num_train = train_images.shape[0]
    num_complete_batches, leftover = divmod(num_train, batch_size)
    num_batches = num_complete_batches + bool(leftover)

    train_images = np.reshape(train_images, batch_shape)
    test_images = np.reshape(test_images, batch_shape)

    def data_stream():
        rng = npr.RandomState(0)
        while True:
            perm = rng.permutation(num_train)
            for i in range(num_batches):
                batch_idx = perm[i * batch_size:(i + 1) * batch_size]
                yield train_images[batch_idx], train_labels[batch_idx]

    def save(fn, opt_state):
        params = deepcopy(get_params(opt_state))
        save_dict = {}
        for idx, p in enumerate(params):
            if (p != ()):
                pp = (p[0].tolist(), p[1].tolist())
                params[idx] = pp
        save_dict["params"] = params
        with open(fn, "w") as f:
            json.dump(save_dict, f)

    def load(fn):
        with open(fn, "r") as f:
            params = json.load(f)
        params = params["params"]
        for idx, p in enumerate(params):
            if (p != []):
                pp = (np.array(p[0]), np.array(p[1]))
                params[idx] = pp
            else:
                params[idx] = ()
        return opt_init(params)

    batches = data_stream()

    # Model, loss, and accuracy functions
    init_random_params, predict = stax.serial(
        stax.Conv(32, (8, 8), strides=(2, 2), padding="SAME"),
        stax.Relu,
        stax.Conv(128, (6, 6), strides=(2, 2), padding="VALID"),
        stax.Relu,
        stax.Conv(128, (5, 5), strides=(1, 1), padding="VALID"),
        stax.Flatten,
        stax.Dense(128),
        stax.Relu,
        stax.Dense(10),
    )

    def loss(params, batch):
        inputs, targets = batch
        preds = predict(params, inputs)
        return -np.mean(logsoftmax(preds) * targets)

    def accuracy(params, batch):
        inputs, targets = batch
        target_class = np.argmax(targets, axis=1)
        predicted_class = np.argmax(predict(params, inputs), axis=1)
        return np.mean(predicted_class == target_class)

    def gen_ellipsoid(X, zeta_rel, zeta_const, alpha, N_steps):
        zeta = (np.abs(X).T * zeta_rel).T + zeta_const
        if (alpha is None):
            alpha = 1 / N_steps * zeta
        else:
            assert isinstance(alpha, float), "Alpha must be float"
            alpha = alpha * np.ones_like(X)
        return zeta, alpha

    def gen_ellipsoid_match_volume(X, zeta_const, eps, alpha, N_steps):
        x_norms = np.linalg.norm(np.reshape(X, (X.shape[0], -1)),
                                 ord=1,
                                 axis=1)
        N = np.prod(X.shape[1:])
        zeta_rel = N * (eps - zeta_const) / x_norms
        assert (zeta_rel <= 1.0).all(
        ), "Zeta rel cannot be larger than 1. Please increase zeta const or reduce eps"
        zeta_rel = np.clip(0.0, zeta_rel, 1.0)
        return gen_ellipsoid(X, zeta_rel, zeta_const, alpha, N_steps)

    # Instantiate an optimizer
    opt_init, opt_update, get_params = optimizers.adam(0.001)

    @jit
    def update(i, opt_state, batch):
        params = get_params(opt_state)
        return opt_update(i, grad(loss)(params, batch), opt_state)

    # Initialize model
    _, init_params = init_random_params(rng, batch_shape)
    opt_state = opt_init(init_params)
    itercount = itertools.count()

    try:
        opt_state = load("tutorials/jax/test_model.json")
    except:
        # Training loop
        print("\nStarting training...")
        for _ in range(num_batches):
            opt_state = update(next(itercount), opt_state, next(batches))
        epoch_time = time.time() - start_time
        save("tutorials/jax/test_model.json", opt_state)

    # Evaluate model on clean data
    params = get_params(opt_state)

    # Evaluate model on adversarial data
    model_fn = lambda images: predict(params, images)
    # Generate single attacking test image
    idx = 0
    plt.figure(figsize=(15, 6), constrained_layout=True)

    zeta, alpha = gen_ellipsoid(X=test_images[idx].reshape((1, 28, 28, 1)),
                                zeta_rel=FLAGS.zeta_rel,
                                zeta_const=FLAGS.zeta_const,
                                alpha=None,
                                N_steps=40)
    # zeta, alpha = gen_ellipsoid_match_volume(X=test_images[idx].reshape((1,28,28,1)), zeta_const=FLAGS.zeta_const, eps=FLAGS.eps, alpha=None, N_steps=40)
    test_images_pgd_ellipsoid = projected_gradient_descent(
        model_fn, test_images[idx].reshape((1, 28, 28, 1)), zeta, alpha, 40,
        np.inf)
    predict_pgd_ellipsoid = np.argmax(predict(params,
                                              test_images_pgd_ellipsoid),
                                      axis=1)

    test_images_fgm = fast_gradient_method(
        model_fn, test_images[idx].reshape((1, 28, 28, 1)), 0.075, np.inf)
    predict_fgm = np.argmax(predict(params, test_images_fgm), axis=1)

    test_images_pgd = projected_gradient_descent(
        model_fn, test_images[idx].reshape((1, 28, 28, 1)), FLAGS.eps, 0.01,
        40, 2)
    predict_pgd = np.argmax(predict(params, test_images_pgd), axis=1)

    base = 100
    f_ = lambda x: np.log(x) / np.log(base)
    a = base - 1
    transform = 1 + a * test_images[idx].reshape((1, 28, 28, 1))  # [1,base]

    # test_images_pgd_transform = projected_gradient_descent(model_fn, f_(np.where(transform > base,base,transform)), FLAGS.zeta_rel, 0.01, 40, np.inf)
    test_images_pgd_transform = projected_gradient_descent(
        model_fn, f_(np.where(transform > base, base, transform)), 1.8, 0.01,
        40, 2)
    test_images_pgd_transform = np.clip(test_images_pgd_transform, 0.0, 1.0)
    test_images_pgd_transform = (base**test_images_pgd_transform - 1) / a
    predict_transform = np.argmax(predict(params, test_images_pgd_transform),
                                  axis=1)

    plt.subplot(151)
    plt.imshow(np.squeeze(test_images[idx]), cmap='gray')
    plt.title("Original")
    plt.subplot(152)
    plt.imshow(np.squeeze(test_images_fgm), cmap='gray')
    plt.title(f"FGM L-Inf Pred: {predict_fgm}")
    plt.subplot(153)
    plt.imshow(np.squeeze(test_images_pgd), cmap='gray')
    plt.title(f"PGD L2 {predict_pgd}")
    plt.subplot(154)
    plt.imshow(np.squeeze(test_images_pgd_ellipsoid), cmap='gray')
    plt.title(f"PGD Ellipsoid L-Inf Pred: {predict_pgd_ellipsoid}")
    plt.subplot(155)
    plt.imshow(np.squeeze(test_images_pgd_transform), cmap='gray')
    plt.title(f"PGD log{base} L2 Pred: {predict_transform}")

    plt.show()

    transform = 1 + a * test_images
    test_images_pgd_transform = projected_gradient_descent(
        model_fn, f_(np.where(transform > base, base, transform)),
        FLAGS.zeta_rel, 0.01, 40, np.inf)
    test_images_pgd_transform = np.clip(test_images_pgd_transform, 0.0, 1.0)
    test_images_pgd_transform = (base**test_images_pgd_transform - 1) / a
    test_acc_pgd_transform = accuracy(params,
                                      (test_images_pgd_transform, test_labels))

    # Generate whole attacking test images
    # zeta, alpha = gen_ellipsoid(X=test_images, zeta_rel=FLAGS.zeta_rel, zeta_const=FLAGS.zeta_const, alpha=None, N_steps=40)
    zeta, alpha = gen_ellipsoid_match_volume(X=test_images,
                                             zeta_const=FLAGS.zeta_const,
                                             eps=FLAGS.eps,
                                             alpha=None,
                                             N_steps=40)
    test_images_pgd_ellipsoid = projected_gradient_descent(
        model_fn, test_images, zeta, alpha, 40, np.inf)
    test_acc_pgd_ellipsoid = accuracy(params,
                                      (test_images_pgd_ellipsoid, test_labels))

    test_images_fgm = fast_gradient_method(model_fn, test_images, FLAGS.eps,
                                           np.inf)
    test_images_pgd = projected_gradient_descent(model_fn, test_images,
                                                 FLAGS.eps, 0.01, 40, np.inf)

    test_acc_fgm = accuracy(params, (test_images_fgm, test_labels))
    test_acc_pgd = accuracy(params, (test_images_pgd, test_labels))

    train_acc = accuracy(params, (train_images, train_labels))
    test_acc = accuracy(params, (test_images, test_labels))

    print("Training set accuracy: {}".format(train_acc))
    print("Test set accuracy on clean examples: {}".format(test_acc))
    print("Test set accuracy on FGM adversarial examples: {}".format(
        test_acc_fgm))
    print("Test set accuracy on PGD adversarial examples: {}".format(
        test_acc_pgd))
    print("Test set accuracy on PGD Ellipsoid adversarial examples: {}".format(
        test_acc_pgd_ellipsoid))
    print(
        "Test set accuracy on PGD Ellipsoid via transform adversarial examples: {}"
        .format(test_acc_pgd_transform))
Exemple #5
0
x_train_n, y_train_n, x_test_n, y_test_n = datasets.get_dataset(
    "fashion_mnist", n_train, n_test, noise=True)

# Constructing Kernels
print("=> Computing NTK for train and test")
with print_time():
    model = kernel_fit(x_train, y_train)

# Evaluating on test set
print(accuracy(model(x_test), y_test, topk=(1, 5)))

print("=> Running high frequency FGM attack against resulting NTK")
with print_time():
    x_test_hf_fgm = fast_gradient_method(
        model_highfreq_transformed(model),
        do_highfreq_transform(x_test),
        0.3,
        np.inf,
    )
print(accuracy(model(x_test_hf_fgm), y_test, topk=(1, 5)))

print("=> Running low frequency FGM attack against resulting NTK")
with print_time():
    x_test_lf_fgm = fast_gradient_method(
        model_lowfreq_transformed(model),
        do_lowfreq_transform(x_test),
        0.3,
        np.inf,
    )
print(accuracy(model(x_test_lf_fgm), y_test, topk=(1, 5)))

print("=> Running FGM attack against resulting NTK")
print("="*80)
x_train, y_train, x_test, y_test = datasets.get_dataset("fashion_mnist", 1024, 128, perturb=True)

validate(
    val_loader=datasets.minibatch(
        x_test, y_test, batch_size=128, train_epochs=1, key=None
    ),
    model=apply_fn,
    params=params,
    criterion=criterion,
    epoch=20,
    batch_size=128,
    num_images=len(x_test),
)

x_train, y_train, x_test, y_test = datasets.get_dataset("fashion_mnist", 1024, 128)

print("=> Running FGM attack against resulting NTK")
now = time.time()
x_test_fgm = fast_gradient_method(model, x_test, 0.3, np.inf)
y_test_fgm = model(x_test_fgm)
print(f"Took {time.time() - now:0.2f}s")
print(accuracy(y_test_fgm, y_test, topk=(1, 5)))


print("=> Running PGD attack against resulting NTK")
now = time.time()
x_test_pgd = projected_gradient_descent(model, x_test, 0.3, 0.01, 40, np.inf)
y_test_pgd = model(x_test_pgd)
print(f"Took {time.time() - now:0.2f}s")
print(accuracy(y_test_pgd, y_test, topk=(1, 5)))