def clip_eta(eta, ord, eps): """ Helper function to clip the perturbation to epsilon norm ball. :param eta: A tensor with the current perturbation. :param ord: Order of the norm (mimics Numpy). Possible values: np.inf, 1 or 2. :param eps: Epsilon, bound of the perturbation. """ # Clipping perturbation eta to ord norm ball if ord not in [np.inf, 1, 2]: raise ValueError('ord must be np.inf, 1, or 2.') reduc_ind = list(range(1, len(eta.get_shape()))) avoid_zero_div = 1e-12 if ord == np.inf: eta = clip_by_value(eta, -eps, eps) elif ord == 1: # Implements a projection algorithm onto the l1-ball from # (Duchi et al. 2008) that runs in time O(d*log(d)) where d is the # input dimension. # Paper link (Duchi et al. 2008): https://dl.acm.org/citation.cfm?id=1390191 eps = tf.cast(eps, eta.dtype) dim = tf.reduce_prod(tf.shape(eta)[1:]) eta_flat = tf.reshape(eta, (-1, dim)) abs_eta = tf.abs(eta_flat) if 'sort' in dir(tf): mu = -tf.sort(-abs_eta, axis=-1) else: # `tf.sort` is only available in TF 1.13 onwards mu = tf.nn.top_k(abs_eta, k=dim, sorted=True)[0] cumsums = tf.cumsum(mu, axis=-1) js = tf.cast(tf.divide(1, tf.range(1, dim + 1)), eta.dtype) t = tf.cast(tf.greater(mu - js * (cumsums - eps), 0), eta.dtype) rho = tf.argmax(t * cumsums, axis=-1) rho_val = tf.reduce_max(t * cumsums, axis=-1) theta = tf.divide(rho_val - eps, tf.cast(1 + rho, eta.dtype)) eta_sgn = tf.sign(eta_flat) eta_proj = eta_sgn * tf.maximum(abs_eta - theta[:, tf.newaxis], 0) eta_proj = tf.reshape(eta_proj, tf.shape(eta)) norm = tf.reduce_sum(tf.abs(eta), reduc_ind) eta = tf.where(tf.greater(norm, eps), eta_proj, eta) elif ord == 2: # avoid_zero_div must go inside sqrt to avoid a divide by zero # in the gradient through this operation norm = tf.sqrt(tf.maximum(avoid_zero_div, reduce_sum(tf.square(eta), reduc_ind, keepdims=True))) # We must *clip* to within the norm ball, not *normalize* onto the # surface of the ball factor = tf.minimum(1., div(eps, norm)) eta = eta * factor return eta
def kl_with_logits(p_logits, q_logits, scope=None, loss_collection=tf.GraphKeys.REGULARIZATION_LOSSES): """Helper function to compute kl-divergence KL(p || q) """ with tf.name_scope(scope, "kl_divergence") as name: p = tf.nn.softmax(p_logits) p_log = tf.nn.log_softmax(p_logits) q_log = tf.nn.log_softmax(q_logits) loss = reduce_mean(reduce_sum(p * (p_log - q_log), axis=1), name=name) tf.losses.add_loss(loss, loss_collection) return loss
def __init__(self, sess, x, logits, targeted_label, targeted_attack, binary_search_steps, max_iterations, initial_const, clip_min, clip_max, nb_classes, batch_size): self.sess = sess self.x = x self.logits = logits assert logits.op.type != 'Softmax' self.targeted_label = targeted_label self.targeted_attack = targeted_attack self.binary_search_steps = binary_search_steps self.max_iterations = max_iterations self.initial_const = initial_const self.clip_min = clip_min self.clip_max = clip_max self.batch_size = batch_size self.repeat = self.binary_search_steps >= 10 self.shape = tuple([self.batch_size] + list(self.x.get_shape().as_list()[1:])) self.ori_img = tf.Variable(np.zeros(self.shape), dtype=tf_dtype, name='ori_img') self.const = tf.Variable(np.zeros(self.batch_size), dtype=tf_dtype, name='const') self.score = softmax_cross_entropy_with_logits( labels=self.targeted_label, logits=self.logits) self.l2dist = reduce_sum(tf.square(self.x - self.ori_img)) # small self.const will result small adversarial perturbation # targeted attack aims at minimize loss against target label # untargeted attack aims at maximize loss against True label if self.targeted_attack: self.loss = reduce_sum(self.score * self.const) + self.l2dist else: self.loss = -reduce_sum(self.score * self.const) + self.l2dist self.grad, = tf.gradients(self.loss, self.x)
def l2_batch_normalize(x, epsilon=1e-12, scope=None): """ Helper function to normalize a batch of vectors. :param x: the input placeholder :param epsilon: stabilizes division :return: the batch of l2 normalized vector """ with tf.name_scope(scope, "l2_batch_normalize") as name_scope: x_shape = tf.shape(x) x = tf.contrib.layers.flatten(x) x /= (epsilon + reduce_max(tf.abs(x), 1, keepdims=True)) square_sum = reduce_sum(tf.square(x), 1, keepdims=True) x_inv_norm = tf.rsqrt(np.sqrt(epsilon) + square_sum) x_norm = tf.multiply(x, x_inv_norm) return tf.reshape(x_norm, x_shape, name_scope)
def optimize_linear(grad, eps, ord=np.inf): """ Solves for the optimal input to a linear function under a norm constraint. Optimal_perturbation = argmax_{eta, ||eta||_{ord} < eps} dot(eta, grad) :param grad: tf tensor containing a batch of gradients :param eps: float scalar specifying size of constraint region :param ord: int specifying order of norm :returns: tf tensor containing optimal perturbation """ # In Python 2, the `list` call in the following line is redundant / harmless. # In Python 3, the `list` call is needed to convert the iterator returned by `range` into a list. red_ind = list(range(1, len(grad.get_shape()))) avoid_zero_div = 1e-12 if ord == np.inf: # Take sign of gradient optimal_perturbation = tf.sign(grad) # The following line should not change the numerical results. # It applies only because `optimal_perturbation` is the output of # a `sign` op, which has zero derivative anyway. # It should not be applied for the other norms, where the # perturbation has a non-zero derivative. optimal_perturbation = tf.stop_gradient(optimal_perturbation) elif ord == 1: abs_grad = tf.abs(grad) sign = tf.sign(grad) max_abs_grad = tf.reduce_max(abs_grad, red_ind, keepdims=True) tied_for_max = tf.to_float(tf.equal(abs_grad, max_abs_grad)) num_ties = tf.reduce_sum(tied_for_max, red_ind, keepdims=True) optimal_perturbation = sign * tied_for_max / num_ties elif ord == 2: square = tf.maximum(avoid_zero_div, reduce_sum(tf.square(grad), reduction_indices=red_ind, keepdims=True)) optimal_perturbation = grad / tf.sqrt(square) else: raise NotImplementedError("Only L-inf, L1 and L2 norms are " "currently implemented.") # Scale perturbation to be the solution for the norm=eps rather than # norm=1 problem scaled_perturbation = mul(eps, optimal_perturbation) return scaled_perturbation
def fgsm_generate(x, model, y=None, eps=0.3, ord=np.inf, clip_min=None, clip_max=None, clip_grad=False, targeted=False, sanity_checks=True): asserts = [] # If a data range was specified, check that the input was in that range if clip_min is not None: asserts.append(tf.assert_greater_equal(x, tf.cast(clip_min, x.dtype))) if clip_max is not None: asserts.append(tf.assert_less_equal(x, tf.cast(clip_max, x.dtype))) logits = model(x)._op.inputs[0] if y is None: # Using model predictions as ground truth to avoid label leaking preds_max = reduce_max(logits, 1, keepdims=True) y = tf.to_float(tf.equal(logits, preds_max)) y = tf.stop_gradient(y) y = y / reduce_sum(y, 1, keepdims=True) # Compute loss ################# ## CE-loss ### ################# loss = softmax_cross_entropy_with_logits(labels=y, logits=logits) if targeted: loss = -loss # ################## # ### CW-loss ### # ################## # logits_sort = tf.contrib.framework.sort(logits, axis=1, direction="DESCENDING") # logits_max = tf.gather(logits_sort, axis=1, indices=[0]) # logits_secondmax = tf.gather(logits_sort, axis=1, indices=[1]) # # logits_loss = logits_max - logits_secondmax # loss = -tf.reduce_mean(logits_loss) # if targeted: # loss = -loss # ################## # ### DLR-loss ### # ################## # logits_sort = tf.contrib.framework.sort(logits, axis=1, direction="DESCENDING") # logits_max = tf.gather(logits_sort, axis=1, indices=[0]) # logits_secondmax = tf.gather(logits_sort, axis=1, indices=[1]) # logits_thirdmax = tf.gather(logits_sort, axis=1, indices=[2]) # # logits_loss = tf.divide(logits_max - logits_secondmax, logits_max - logits_thirdmax + 1e12) # # loss = -tf.reduce_mean(logits_loss) # if targeted: # loss = -loss # Define gradient of loss wrt input grad, = tf.gradients(loss, x) if clip_grad: grad = zero_out_clipped_grads(grad, x, clip_min, clip_max) optimal_perturbation = optimize_linear(grad, eps, ord) # Add perturbation to original example to obtain adversarial example adv_x = x + optimal_perturbation # If clipping is needed, reset all values outside of [clip_min, clip_max] if (clip_min is not None) or (clip_max is not None): # We don't currently support one-sided clipping assert clip_min is not None and clip_max is not None adv_x = clip_by_value(adv_x, clip_min, clip_max) if sanity_checks: with tf.control_dependencies(asserts): adv_x = tf.identity(adv_x) return adv_x