def optimize_linear(grad, eps, ord=np.inf): """ Solves for the optimal input to a linear function under a norm constraint. Optimal_perturbation = argmax_{eta, ||eta||_{ord} < eps} dot(eta, grad) :param grad: tf tensor containing a batch of gradients :param eps: float scalar specifying size of constraint region :param ord: int specifying order of norm :returns: tf tensor containing optimal perturbation """ red_ind = list(range(1, len(grad.get_shape()))) avoid_zero_div = 1e-12 if ord == np.inf: optimal_perturbation = tf.sign(grad) optimal_perturbation = tf.stop_gradient(optimal_perturbation) elif ord == 1: abs_grad = tf.abs(grad) sign = tf.sign(grad) max_abs_grad = tf.reduce_max(abs_grad, red_ind, keepdims=True) tied_for_max = tf.to_float(tf.equal(abs_grad, max_abs_grad)) num_ties = tf.reduce_sum(tied_for_max, red_ind, keepdims=True) optimal_perturbation = sign * tied_for_max / num_ties elif ord == 2: square = tf.maximum( avoid_zero_div, reduce_sum(tf.square(grad), reduction_indices=red_ind, keepdims=True)) optimal_perturbation = grad / tf.sqrt(square) else: raise NotImplementedError("Only L-inf, L1 and L2 norms are " "currently implemented.") scaled_perturbation = utils_tf.mul(eps, optimal_perturbation) return scaled_perturbation
def optimize_linear(grad, eps, ord=np.inf): if ord == np.inf: optimal_perturbation = tf.clip_by_value(tf.round(grad), -2, 2) else: raise NotImplementedError("Only L-inf, norms are " "currently implemented.") scaled_perturbation = utils_tf.mul(eps, optimal_perturbation) return scaled_perturbation
def optimize_linear_pos(grad, eps, ord=np.inf, pert_type='all'): """ Solves for the optimal input to a linear function under a norm constraint. Optimal_perturbation = argmax_{eta, ||eta||_{ord} < eps} dot(eta, grad) :param grad: tf tensor containing a batch of gradients :param eps: float scalar specifying size of constraint region :param ord: int specifying order of norm :returns: tf tensor containing optimal perturbation """ # In Python 2, the `list` call in the following line is redundant / harmless. # In Python 3, the `list` call is needed to convert the iterator returned by `range` into a list. red_ind = list(range(1, len(grad.get_shape()))) if ord == np.inf: # Take sign of gradient optimal_perturbation = tf.sign(grad) if pert_type == 'all': pass elif pert_type == 'pos': optimal_perturbation = tf.minimum(optimal_perturbation, 0) # pos noise elif pert_type == 'neg': optimal_perturbation = tf.maximum(optimal_perturbation, 0) # neg noise else: raise Exception("pert_type must be 'all', 'pos' or 'neg'") # The following line should not change the numerical results. # It applies only because `optimal_perturbation` is the output of a `sign` op, which has zero derivative anyway. # It should not be applied for the other norms, where the perturbation has a non-zero derivative. optimal_perturbation = tf.stop_gradient(optimal_perturbation) elif ord == 1: abs_grad = tf.abs(grad) sign = tf.sign(grad) max_abs_grad = tf.reduce_max(abs_grad, axis=red_ind, keepdims=True) tied_for_max = tf.to_float(tf.equal(abs_grad, max_abs_grad)) num_ties = tf.reduce_sum(tied_for_max, axis=red_ind, keepdims=True) optimal_perturbation = sign * tied_for_max / num_ties elif ord == 2: avoid_zero_div = 1e-12 square = tf.maximum( avoid_zero_div, tf.reduce_sum(tf.square(grad), axis=red_ind, keepdims=True)) optimal_perturbation = grad / tf.sqrt(square) else: raise NotImplementedError( "Only L-inf, L1 and L2 norms are currently implemented.") # Scale perturbation to be the solution for the norm=eps rather than norm=1 problem scaled_perturbation = utils_tf.mul(eps, optimal_perturbation) return scaled_perturbation
def sparse_l1_descent(x, logits, y=None, eps=1.0, q=99, clip_min=None, clip_max=None, clip_grad=False, targeted=False, sanity_checks=True): """ TensorFlow implementation of the Dense L1 Descent Method. :param x: the input placeholder :param logits: output of model.get_logits :param y: (optional) A placeholder for the true labels. If targeted is true, then provide the target label. Otherwise, only provide this parameter if you'd like to use true labels when crafting adversarial samples. Otherwise, model predictions are used as labels to avoid the "label leaking" effect (explained in this paper: https://arxiv.org/abs/1611.01236). Default is None. Labels should be one-hot-encoded. :param eps: the epsilon (input variation parameter) :param q: the percentile above which gradient values are retained. Either a scalar or a vector of same length as the input batch dimension. :param clip_min: Minimum float value for adversarial example components :param clip_max: Maximum float value for adversarial example components :param clip_grad: (optional bool) Ignore gradient components at positions where the input is already at the boundary of the domain, and the update step will get clipped out. :param targeted: Is the attack targeted or untargeted? Untargeted, the default, will try to make the label incorrect. Targeted will instead try to move in the direction of being more like y. :return: a tensor for the adversarial example """ asserts = [] # If a data range was specified, check that the input was in that range if clip_min is not None: asserts.append( utils_tf.assert_greater_equal(x, tf.cast(clip_min, x.dtype))) if clip_max is not None: asserts.append( utils_tf.assert_less_equal(x, tf.cast(clip_max, x.dtype))) # Make sure the caller has not passed probs by accident assert logits.op.type != 'Softmax' if y is None: # Using model predictions as ground truth to avoid label leaking preds_max = reduce_max(logits, 1, keepdims=True) y = tf.to_float(tf.equal(logits, preds_max)) y = tf.stop_gradient(y) y = y / reduce_sum(y, 1, keepdims=True) # Compute loss loss = softmax_cross_entropy_with_logits(labels=y, logits=logits) if targeted: loss = -loss # Define gradient of loss wrt input grad, = tf.gradients(loss, x) if clip_grad: grad = utils_tf.zero_out_clipped_grads(grad, x, clip_min, clip_max) red_ind = list(range(1, len(grad.get_shape()))) dim = tf.reduce_prod(tf.shape(x)[1:]) abs_grad = tf.reshape(tf.abs(grad), (-1, dim)) # if q is a scalar, broadcast it to a vector of same length as the batch dim q = tf.cast(tf.broadcast_to(q, tf.shape(x)[0:1]), tf.float32) k = tf.cast(tf.floor(q / 100 * tf.cast(dim, tf.float32)), tf.int32) # `tf.sort` is much faster than `tf.contrib.distributions.percentile`. # For TF <= 1.12, use `tf.nn.top_k` as `tf.sort` is not implemented. if LooseVersion(tf.__version__) <= LooseVersion('1.12.0'): # `tf.sort` is only available in TF 1.13 onwards sorted_grad = -tf.nn.top_k(-abs_grad, k=dim, sorted=True)[0] else: sorted_grad = tf.sort(abs_grad, axis=-1) idx = tf.stack((tf.range(tf.shape(abs_grad)[0]), k), -1) percentiles = tf.gather_nd(sorted_grad, idx) tied_for_max = tf.greater_equal(abs_grad, tf.expand_dims(percentiles, -1)) tied_for_max = tf.reshape(tf.cast(tied_for_max, x.dtype), tf.shape(grad)) num_ties = tf.reduce_sum(tied_for_max, red_ind, keepdims=True) optimal_perturbation = tf.sign(grad) * tied_for_max / num_ties # Add perturbation to original example to obtain adversarial example adv_x = x + utils_tf.mul(eps, optimal_perturbation) # If clipping is needed, reset all values outside of [clip_min, clip_max] if (clip_min is not None) or (clip_max is not None): # We don't currently support one-sided clipping assert clip_min is not None and clip_max is not None adv_x = utils_tf.clip_by_value(adv_x, clip_min, clip_max) if sanity_checks: with tf.control_dependencies(asserts): adv_x = tf.identity(adv_x) return adv_x
# Normalize current gradient and add it to the accumulated gradient red_ind = list(range(1, len(grad.get_shape()))) avoid_zero_div = tf.cast(1e-12, grad.dtype) divisor = tf.reduce_mean(tf.abs(grad), red_ind, keepdims=True) norm_grad = grad / tf.maximum(avoid_zero_div, divisor) m = tf.placeholder(tf.float32, shape=inputs.get_shape().as_list(), name="momentum") acc_m = m + norm_grad grad = acc_m # ord=np.inf optimal_perturbation = tf.sign(grad) optimal_perturbation = tf.stop_gradient(optimal_perturbation) scaled_perturbation_inf = utils_tf.mul(0.01, optimal_perturbation) # ord=2 square = tf.maximum( 1e-12, tf.reduce_sum(tf.square(grad), axis=red_ind, keepdims=True)) optimal_perturbation = grad / tf.sqrt(square) scaled_perturbation_2 = utils_tf.mul(0.01, optimal_perturbation) def attack(input_img, len_x, target_txt, pert_type='2'): target_index_list = [ np.asarray([c for c in encode(t)]) for t in target_txt ] with graph.as_default(): adv_img = input_img.copy() m0 = np.zeros(input_img.shape) record_iter = np.zeros(input_img.shape[0]) # 0代表没成功