def clip_eta(eta, ord, eps): """ Helper function to clip the perturbation to epsilon norm ball. :param eta: A tensor with the current perturbation. :param ord: Order of the norm (mimics Numpy). Possible values: np.inf, 1 or 2. :param eps: Epilson, bound of the perturbation. """ # Clipping perturbation eta to self.ord norm ball if ord not in [np.inf, 1, 2]: raise ValueError('ord must be np.inf, 1, or 2.') reduc_ind = list(xrange(1, len(eta.get_shape()))) avoid_zero_div = 1e-12 if ord == np.inf: eta = tf.clip_by_value(eta, -eps, eps) else: if ord == 1: norm = tf.maximum(avoid_zero_div, reduce_sum(tf.abs(eta), reduc_ind, keepdims=True)) elif ord == 2: # avoid_zero_div must go inside sqrt to avoid a divide by zero # in the gradient through this operation norm = tf.sqrt(tf.maximum(avoid_zero_div, reduce_sum(tf.square(eta), reduc_ind, keepdims=True))) # We must *clip* to within the norm ball, not *normalize* onto the # surface of the ball factor = tf.minimum(1., eps / norm) eta = eta * factor return eta
def kl_with_logits(p_logits, q_logits, scope=None, loss_collection=tf.GraphKeys.REGULARIZATION_LOSSES): """Helper function to compute kl-divergence KL(p || q) """ with tf.name_scope(scope, "kl_divergence") as name: p = tf.nn.softmax(p_logits) p_log = tf.nn.log_softmax(p_logits) q_log = tf.nn.log_softmax(q_logits) loss = reduce_mean(reduce_sum(p * (p_log - q_log), axis=1), name=name) tf.losses.add_loss(loss, loss_collection) return loss
def l2_batch_normalize(x, epsilon=1e-12, scope=None): """ Helper function to normalize a batch of vectors. :param x: the input placeholder :param epsilon: stabilizes division :return: the batch of l2 normalized vector """ with tf.name_scope(scope, "l2_batch_normalize") as scope: x_shape = tf.shape(x) x = tf.contrib.layers.flatten(x) x /= (epsilon + reduce_max(tf.abs(x), 1, keepdims=True)) square_sum = reduce_sum(tf.square(x), 1, keepdims=True) x_inv_norm = tf.rsqrt(np.sqrt(epsilon) + square_sum) x_norm = tf.multiply(x, x_inv_norm) return tf.reshape(x_norm, x_shape, scope)
def spm(x, model, y=None, n_samples=None, dx_min=-0.1, dx_max=0.1, n_dxs=5, dy_min=-0.1, dy_max=0.1, n_dys=5, angle_min=-30, angle_max=30, n_angles=31, black_border_size=0): """ TensorFlow implementation of the Spatial Transformation Method. :return: a tensor for the adversarial example """ if y is None: preds = model.get_probs(x) # Using model predictions as ground truth to avoid label leaking preds_max = reduce_max(preds, 1, keepdims=True) y = tf.to_float(tf.equal(preds, preds_max)) y = tf.stop_gradient(y) del preds y = y / reduce_sum(y, 1, keepdims=True) # Define the range of transformations dxs = np.linspace(dx_min, dx_max, n_dxs) dys = np.linspace(dy_min, dy_max, n_dys) angles = np.linspace(angle_min, angle_max, n_angles) if n_samples is None: import itertools transforms = list(itertools.product(*[dxs, dys, angles])) else: sampled_dxs = np.random.choice(dxs, n_samples) sampled_dys = np.random.choice(dys, n_samples) sampled_angles = np.random.choice(angles, n_samples) transforms = zip(sampled_dxs, sampled_dys, sampled_angles) transformed_ims = parallel_apply_transformations(x, transforms, black_border_size) def _compute_xent(x): preds = model.get_logits(x) return tf.nn.softmax_cross_entropy_with_logits_v2(labels=y, logits=preds) all_xents = tf.map_fn( _compute_xent, transformed_ims, parallel_iterations=1) # Must be 1 to avoid keras race conditions # Return the adv_x with worst accuracy # all_xents is n_total_samples x batch_size (SB) all_xents = tf.stack(all_xents) # SB # We want the worst case sample, with the largest xent_loss worst_sample_idx = tf.argmax(all_xents, axis=0) # B batch_size = tf.shape(x)[0] keys = tf.stack([ tf.range(batch_size, dtype=tf.int32), tf.cast(worst_sample_idx, tf.int32) ], axis=1) transformed_ims_bshwc = tf.einsum('sbhwc->bshwc', transformed_ims) after_lookup = tf.gather_nd(transformed_ims_bshwc, keys) # BHWC return after_lookup
def generate(self, x, **kwargs): assert self.parse_params(**kwargs) asserts = [] if self.clip_min is not None: asserts.append(utils_tf.assert_greater_equal( x, tf.cast(self.clip_min,x.dtype))) if self.clip_max is not None: asserts.append(utils_tf.assert_less_equal( x, tf.cast(self.clip_max, x.dtype))) m_cache = tf.zeros_like(x) v_cache = tf.zeros_like(x) adv_x = x y, _nb_classes = self.get_or_guess_labels(x, kwargs) y = y / reduce_sum(y, 1, keepdims=True) targeted = (self.y_target is not None) def save_batch(directory, images, labels, iteration, batch_idx): for idx, (image, label) in enumerate(zip(images, labels)): filename = "id{}_b{}_it{}_l{}.png".format(idx, batch_idx, iteration, np.argmax(label)) save_image_np(join(directory, filename), image) for i in range(self.nb_iter): self.logger.debug("Starting #{} iteration".format(i + 1)) logits = self.model.get_logits(adv_x) loss = softmax_cross_entropy_with_logits(labels=y, logits=logits) if targeted: loss = -loss grad, = tf.gradients(loss, adv_x) red_ind = list(range(1, len(grad.get_shape()))) avoid_zero_div = tf.cast(1e-8, grad.dtype) grad = grad / tf.maximum( avoid_zero_div, reduce_mean(tf.abs(grad), red_ind, keepdims=True)) m_cache = self.betha1 * m_cache + (1 - self.betha1) * grad v_cache = self.betha2 * v_cache + (1 - self.betha2) * tf.square(grad) update = tf.divide(m_cache, tf.sqrt(v_cache + avoid_zero_div)) optimal_perturbation = optimize_linear(update, self.eps_iter, self.ord) if self.ord == 1: raise NotImplementedError("This attack hasn't been tested for ord=1." "It's not clear that FGM makes a good inner " "loop step for iterative optimization since " "it updates just one coordinate at a time.") adv_x = adv_x + optimal_perturbation adv_x = x + utils_tf.clip_eta(adv_x - x, self.ord, self.eps) if self.clip_min is not None and self.clip_max is not None: adv_x = utils_tf.clip_by_value(adv_x, self.clip_min, self.clip_max) adv_x = tf.stop_gradient(adv_x) if self.sanity_checks: with tf.control_dependencies(asserts): adv_x = tf.identity(adv_x) with self.sess.as_default(): self.sess.run(self.init_op) for batch in range(self.nb_batches): adv_x_np, y_np = self.sess.run([adv_x, y]) self.logger.debug("Saving attacked batch #{}".format(batch + 1)) save_batch(self.adv_dir, adv_x_np, y_np, i, batch)
def fgm(self, x, labels, targeted=False): """ TensorFlow Eager implementation of the Fast Gradient Method. :param x: the input variable :param targeted: Is the attack targeted or untargeted? Untargeted, the default, will try to make the label incorrect. Targeted will instead try to move in the direction of being more like y. :return: a tensor for the adversarial example """ # Compute loss with tf.GradientTape() as tape: # input should be watched because it may be # combination of trainable and non-trainable variables tape.watch(x) loss_obj = LossCrossEntropy(self.model, smoothing=0.) loss = loss_obj.fprop(x=x, y=labels) if targeted: loss = -loss # Define gradient of loss wrt input grad = tape.gradient(loss, x) if self.ord == np.inf: # Take sign of gradient normalized_grad = tf.sign(grad) # The following line should not change the numerical results. # It applies only because `normalized_grad` is the output of # a `sign` op, which has zero derivative anyway. # It should not be applied for the other norms, where the # perturbation has a non-zero derivative. normalized_grad = tf.stop_gradient(normalized_grad) elif self.ord == 1: red_ind = list(xrange(1, len(x.get_shape()))) avoid_zero_div = 1e-12 avoid_nan_norm = tf.maximum( avoid_zero_div, reduce_sum(tf.abs(grad), reduction_indices=red_ind, keepdims=True)) normalized_grad = grad / avoid_nan_norm elif self.ord == 2: red_ind = list(xrange(1, len(x.get_shape()))) avoid_zero_div = 1e-12 square = tf.maximum( avoid_zero_div, reduce_sum(tf.square(grad), reduction_indices=red_ind, keepdims=True)) normalized_grad = grad / tf.sqrt(square) else: raise NotImplementedError("Only L-inf, L1 and L2 norms are " "currently implemented.") # Multiply by constant epsilon scaled_grad = self.eps * normalized_grad # Add perturbation to original example to obtain adversarial example adv_x = x + scaled_grad # If clipping is needed # reset all values outside of [clip_min, clip_max] if (self.clip_min is not None) and (self.clip_max is not None): adv_x = tf.clip_by_value(adv_x, self.clip_min, self.clip_max) return adv_x
def __init__(self, sess, model, beta, decision_rule, batch_size, confidence, targeted, learning_rate, binary_search_steps, max_iterations, abort_early, initial_const, clip_min, clip_max, num_labels, shape): """ EAD Attack Return a tensor that constructs adversarial examples for the given input. Generate uses tf.py_func in order to operate over tensors. :param sess: a TF session. :param model: a cleverhans.model.Model object. :param beta: Trades off L2 distortion with L1 distortion: higher produces examples with lower L1 distortion, at the cost of higher L2 (and typically Linf) distortion :param decision_rule: EN or L1. Select final adversarial example from all successful examples based on the least elastic-net or L1 distortion criterion. :param batch_size: Number of attacks to run simultaneously. :param confidence: Confidence of adversarial examples: higher produces examples with larger l2 distortion, but more strongly classified as adversarial. :param targeted: boolean controlling the behavior of the adversarial examples produced. If set to False, they will be misclassified in any wrong class. If set to True, they will be misclassified in a chosen target class. :param learning_rate: The learning rate for the attack algorithm. Smaller values produce better results but are slower to converge. :param binary_search_steps: The number of times we perform binary search to find the optimal tradeoff- constant between norm of the perturbation and confidence of the classification. Set 'initial_const' to a large value and fix this param to 1 for speed. :param max_iterations: The maximum number of iterations. Setting this to a larger value will produce lower distortion results. Using only a few iterations requires a larger learning rate, and will produce larger distortion results. :param abort_early: If true, allows early abort when the total loss starts to increase (greatly speeds up attack, but hurts performance, particularly on ImageNet) :param initial_const: The initial tradeoff-constant to use to tune the relative importance of size of the perturbation and confidence of classification. If binary_search_steps is large, the initial constant is not important. A smaller value of this constant gives lower distortion results. For computational efficiency, fix binary_search_steps to 1 and set this param to a large value. :param clip_min: (optional float) Minimum input component value. :param clip_max: (optional float) Maximum input component value. :param num_labels: the number of classes in the model's output. :param shape: the shape of the model's input tensor. """ self.sess = sess self.TARGETED = targeted self.LEARNING_RATE = learning_rate self.MAX_ITERATIONS = max_iterations self.BINARY_SEARCH_STEPS = binary_search_steps self.ABORT_EARLY = abort_early self.CONFIDENCE = confidence self.initial_const = initial_const self.batch_size = batch_size self.clip_min = clip_min self.clip_max = clip_max self.model = model self.decision_rule = decision_rule self.beta = beta self.beta_t = tf.cast(self.beta, tf_dtype) self.repeat = binary_search_steps >= 10 self.shape = shape = tuple([batch_size] + list(shape)) # these are variables to be more efficient in sending data to tf self.timg = tf.Variable(np.zeros(shape), dtype=tf_dtype, name='timg') self.newimg = tf.Variable(np.zeros(shape), dtype=tf_dtype, name='newimg') self.slack = tf.Variable(np.zeros(shape), dtype=tf_dtype, name='slack') self.tlab = tf.Variable(np.zeros((batch_size, num_labels)), dtype=tf_dtype, name='tlab') self.const = tf.Variable(np.zeros(batch_size), dtype=tf_dtype, name='const') # and here's what we use to assign them self.assign_timg = tf.compat.v1.placeholder(tf_dtype, shape, name='assign_timg') self.assign_newimg = tf.compat.v1.placeholder(tf_dtype, shape, name='assign_newimg') self.assign_slack = tf.compat.v1.placeholder(tf_dtype, shape, name='assign_slack') self.assign_tlab = tf.compat.v1.placeholder(tf_dtype, (batch_size, num_labels), name='assign_tlab') self.assign_const = tf.compat.v1.placeholder(tf_dtype, [batch_size], name='assign_const') self.global_step = tf.Variable(0, trainable=False) self.global_step_t = tf.cast(self.global_step, tf_dtype) # Fast Iterative Shrinkage Thresholding self.zt = tf.divide(self.global_step_t, self.global_step_t + tf.cast(3, tf_dtype)) cond1 = tf.cast( tf.greater(tf.subtract(self.slack, self.timg), self.beta_t), tf_dtype) cond2 = tf.cast( tf.less_equal(tf.abs(tf.subtract(self.slack, self.timg)), self.beta_t), tf_dtype) cond3 = tf.cast( tf.less(tf.subtract(self.slack, self.timg), tf.negative(self.beta_t)), tf_dtype) upper = tf.minimum(tf.subtract(self.slack, self.beta_t), tf.cast(self.clip_max, tf_dtype)) lower = tf.maximum(tf.add(self.slack, self.beta_t), tf.cast(self.clip_min, tf_dtype)) self.assign_newimg = tf.multiply(cond1, upper) self.assign_newimg += tf.multiply(cond2, self.timg) self.assign_newimg += tf.multiply(cond3, lower) self.assign_slack = self.assign_newimg self.assign_slack += tf.multiply(self.zt, self.assign_newimg - self.newimg) # -------------------------------- self.setter = tf.compat.v1.assign(self.newimg, self.assign_newimg) self.setter_y = tf.compat.v1.assign(self.slack, self.assign_slack) # prediction BEFORE-SOFTMAX of the model self.output = model.get_logits(self.newimg) self.output_y = model.get_logits(self.slack) # distance to the input data self.l2dist = reduce_sum(tf.square(self.newimg - self.timg), list(range(1, len(shape)))) self.l2dist_y = reduce_sum(tf.square(self.slack - self.timg), list(range(1, len(shape)))) self.l1dist = reduce_sum(tf.abs(self.newimg - self.timg), list(range(1, len(shape)))) self.l1dist_y = reduce_sum(tf.abs(self.slack - self.timg), list(range(1, len(shape)))) self.elasticdist = self.l2dist + tf.multiply(self.l1dist, self.beta_t) self.elasticdist_y = self.l2dist_y + tf.multiply( self.l1dist_y, self.beta_t) if self.decision_rule == 'EN': self.crit = self.elasticdist self.crit_p = 'Elastic' else: self.crit = self.l1dist self.crit_p = 'L1' # compute the probability of the label class versus the maximum other real = reduce_sum((self.tlab) * self.output, 1) real_y = reduce_sum((self.tlab) * self.output_y, 1) other = reduce_max((1 - self.tlab) * self.output - (self.tlab * 10000), 1) other_y = reduce_max( (1 - self.tlab) * self.output_y - (self.tlab * 10000), 1) if self.TARGETED: # if targeted, optimize for making the other class most likely loss1 = tf.maximum(ZERO(), other - real + self.CONFIDENCE) loss1_y = tf.maximum(ZERO(), other_y - real_y + self.CONFIDENCE) else: # if untargeted, optimize for making this class least likely. loss1 = tf.maximum(ZERO(), real - other + self.CONFIDENCE) loss1_y = tf.maximum(ZERO(), real_y - other_y + self.CONFIDENCE) # sum up the losses self.loss21 = reduce_sum(self.l1dist) self.loss21_y = reduce_sum(self.l1dist_y) self.loss2 = reduce_sum(self.l2dist) self.loss2_y = reduce_sum(self.l2dist_y) self.loss1 = reduce_sum(self.const * loss1) self.loss1_y = reduce_sum(self.const * loss1_y) self.loss_opt = self.loss1_y + self.loss2_y self.loss = self.loss1 + self.loss2 + tf.multiply( self.beta_t, self.loss21) self.learning_rate = tf.compat.v1.train.polynomial_decay( self.LEARNING_RATE, self.global_step, self.MAX_ITERATIONS, 0, power=0.5) # Setup the optimizer and keep track of variables we're creating start_vars = set(x.name for x in tf.compat.v1.global_variables()) optimizer = tf.compat.v1.train.GradientDescentOptimizer( self.learning_rate) self.train = optimizer.minimize(self.loss_opt, var_list=[self.slack], global_step=self.global_step) end_vars = tf.compat.v1.global_variables() new_vars = [x for x in end_vars if x.name not in start_vars] # these are the variables to initialize when we run self.setup = [] self.setup.append(self.timg.assign(self.assign_timg)) self.setup.append(self.tlab.assign(self.assign_tlab)) self.setup.append(self.const.assign(self.assign_const)) var_list = [self.global_step] + [self.slack] + [self.newimg] + new_vars self.init = tf.compat.v1.variables_initializer(var_list=var_list)
def body(x_in, y_in, domain_in, i_in, cond_in): # Create graph for model logits and predictions logits = model.get_logits(x_in) preds = tf.nn.softmax(logits) preds_onehot = tf.one_hot(tf.argmax(preds, axis=1), depth=nb_classes) # create the Jacobian graph list_derivatives = [] for class_ind in xrange(nb_classes): derivatives = tf.gradients(logits[:, class_ind], x_in) list_derivatives.append(derivatives[0]) grads = tf.reshape( tf.stack(list_derivatives), shape=[nb_classes, -1, nb_features]) # Compute the Jacobian components # To help with the computation later, reshape the target_class # and other_class to [nb_classes, -1, 1]. # The last dimention is added to allow broadcasting later. target_class = tf.reshape( tf.transpose(y_in, perm=[1, 0]), shape=[nb_classes, -1, 1]) other_classes = tf.cast(tf.not_equal(target_class, 1), tf_dtype) grads_target = reduce_sum(grads * target_class, axis=0) grads_mine = (grads*other_classes) grads_other = reduce_sum(grads * other_classes, axis=0) # target class is ignored because other_classes is 0 where # the target class is. and target class must be greater than # zero. max_others = reduce_max(grads_mine, 0, True) # print(grads_mine.shape) # print(max_others.shape) # Remove the already-used input features from the search space # Subtract 2 times the maximum value from those value so that # they won't be picked later increase_coef = (4 * int(increase) - 2) \ * tf.cast(tf.equal(domain_in, 0), tf_dtype) target_tmp = grads_target target_tmp -= increase_coef \ * reduce_max(tf.abs(grads_target), axis=1, keepdims=True) target_sum = tf.reshape(target_tmp, shape=[-1, nb_features, 1]) \ + tf.reshape(target_tmp, shape=[-1, 1, nb_features]) other_tmp = grads_other other_tmp += increase_coef \ * reduce_max(tf.abs(grads_other), axis=1, keepdims=True) other_sum = tf.reshape(other_tmp, shape=[-1, nb_features, 1]) \ + tf.reshape(other_tmp, shape=[-1, 1, nb_features]) # Create a mask to only keep features that match conditions if increase: #scores_mask = ((target_sum > 0) & (other_sum < 0 ) ) scores_mask = ((target_sum > 0) & (other_sum < 0 ) & (target_sum > max_others)) else: scores_mask = ((target_sum < 0) & (other_sum > 0) ) global it_count it_count = it_count +1 #Create a 2D numpy array of scores for each pair of candidate features scores = tf.cast(scores_mask, tf_dtype) \ * (-target_sum * other_sum) * zero_diagonal # scores = tf.exp(target_sum, name = 'exp') # Extract the best two pixels best = tf.argmax( tf.reshape(scores, shape=[-1, nb_features * nb_features]), axis=1) p1 = tf.mod(best, nb_features) p2 = tf.floordiv(best, nb_features) p1_one_hot = tf.one_hot(p1, depth=nb_features) p2_one_hot = tf.one_hot(p2, depth=nb_features) # Check if more modification is needed for each sample mod_not_done = tf.equal(reduce_sum(y_in * preds_onehot, axis=1), 0) cond = mod_not_done & (reduce_sum(domain_in, axis=1) >= 2) # Update the search domain cond_float = tf.reshape(tf.cast(cond, tf_dtype), shape=[-1, 1]) to_mod = (p1_one_hot + p2_one_hot) * cond_float domain_out = domain_in - to_mod # Apply the modification to the images to_mod_reshape = tf.reshape( to_mod, shape=([-1] + x_in.shape[1:].as_list())) if increase: x_out = tf.minimum(clip_max, x_in + to_mod_reshape * theta) else: x_out = tf.maximum(clip_min, x_in - to_mod_reshape * theta) # Increase the iterator, and check if all misclassifications are done i_out = tf.add(i_in, 1) cond_out = reduce_any(cond) return x_out, y_in, domain_out, i_out, cond_out
def generate(self, x, **kwargs): """ Generate symbolic graph for adversarial examples and return. :param x: The model's symbolic inputs. :param kwargs: Keyword arguments. See `parse_params` for documentation. """ # Parse and save attack-specific parameters assert self.parse_params(**kwargs) asserts = [] # If a data range was specified, check that the input was in that range if self.clip_min is not None: asserts.append(utils_tf.assert_greater_equal(x, tf.cast(self.clip_min, x.dtype))) if self.clip_max is not None: asserts.append(utils_tf.assert_less_equal(x, tf.cast(self.clip_max, x.dtype))) # Initialize loop variables momentum = tf.zeros_like(x) adv_x = x # Fix labels to the first model predictions for loss computation y, _nb_classes = self.get_or_guess_labels(x, kwargs) y = y / reduce_sum(y, 1, keepdims=True) targeted = (self.y_target is not None) def cond(i, _, __): """Iterate until number of iterations completed""" return tf.less(i, self.nb_iter) def body(i, ax, m): """Do a momentum step""" logits = self.model.get_logits(ax) loss = softmax_cross_entropy_with_logits(labels=y, logits=logits) if targeted: loss = -loss # Define gradient of loss wrt input grad, = tf.gradients(loss, ax) grad = tf.nn.depthwise_conv2d(grad, self.kernel, strides=[1, 1, 1, 1], padding='SAME') # Normalize current gradient and add it to the accumulated gradient red_ind = list(range(1, len(grad.get_shape()))) avoid_zero_div = tf.cast(1e-12, grad.dtype) grad = grad / tf.maximum( avoid_zero_div, reduce_mean(tf.abs(grad), red_ind, keepdims=True)) m = self.decay_factor * m + grad optimal_perturbation = optimize_linear(m, self.eps_iter, self.ord) if self.ord == 1: raise NotImplementedError("This attack hasn't been tested for ord=1." "It's not clear that FGM makes a good inner " "loop step for iterative optimization since " "it updates just one coordinate at a time.") # Update and clip adversarial example in current iteration ax = ax + optimal_perturbation ax = x + utils_tf.clip_eta(ax - x, self.ord, self.eps) if self.clip_min is not None and self.clip_max is not None: ax = utils_tf.clip_by_value(ax, self.clip_min, self.clip_max) ax = tf.stop_gradient(ax) return i + 1, ax, m _, adv_x, _ = tf.while_loop( cond, body, (tf.zeros([]), adv_x, momentum), back_prop=True, maximum_iterations=self.nb_iter) if self.sanity_checks: with tf.control_dependencies(asserts): adv_x = tf.identity(adv_x) return adv_x
def __init__(self, sess, model, reconstructor, batch_size, confidence, targeted, learning_rate, binary_search_steps, max_iterations, abort_early, initial_const, clip_min, clip_max, num_labels, shape): self.sess = sess self.TARGETED = targeted self.LEARNING_RATE = learning_rate self.MAX_ITERATIONS = max_iterations self.BINARY_SEARCH_STEPS = binary_search_steps self.ABORT_EARLY = abort_early self.CONFIDENCE = confidence self.initial_const = initial_const self.batch_size = batch_size self.clip_min = clip_min self.clip_max = clip_max self.model = model self.reconstructor = reconstructor self.repeat = binary_search_steps >= 10 self.shape = shape = tuple([batch_size] + list(shape)) # the variable we're going to optimize over modifier = tf.Variable(np.zeros(shape, dtype=np_dtype)) # these are variables to be more efficient in sending data to tf self.timg = tf.Variable(np.zeros(shape), dtype=tf_dtype, name='timg') self.tlab = tf.Variable(np.zeros((batch_size, num_labels)), dtype=tf_dtype, name='tlab') self.const = tf.Variable(np.zeros(batch_size), dtype=tf_dtype, name='const') # and here's what we use to assign them self.assign_timg = tf.placeholder(tf_dtype, shape, name='assign_timg') self.assign_tlab = tf.placeholder(tf_dtype, (batch_size, num_labels), name='assign_tlab') self.assign_const = tf.placeholder(tf_dtype, [batch_size], name='assign_const') # the resulting instance, tanh'd to keep bounded from clip_min # to clip_max self.newimg = (tf.tanh(modifier + self.timg) + 1) / 2 self.newimg = self.newimg * (clip_max - clip_min) + clip_min recon_img = tf.stop_gradient( self.reconstructor.reconstruct(self.newimg, batch_size=batch_size)[0]) recon_img = (tf.tanh(recon_img) + 1) / 2 * (clip_max - clip_min) + clip_min # prediction BEFORE-SOFTMAX of the model self.output = model.get_logits(recon_img) # distance to the input data self.other = (tf.tanh(self.timg) + 1) / \ 2 * (clip_max - clip_min) + clip_min #self.l2dist = reduce_sum( # tf.square(self.newimg - self.other), list(range(1, len(shape)))) self.l2dist = reduce_sum(tf.square(recon_img - self.other), list(range(1, len(shape)))) # compute the probability of the label class versus the maximum other real = reduce_sum((self.tlab) * self.output, 1) other = reduce_max((1 - self.tlab) * self.output - self.tlab * 10000, 1) if self.TARGETED: # if targeted, optimize for making the other class most likely loss1 = tf.maximum(ZERO(), other - real + self.CONFIDENCE) else: # if untargeted, optimize for making this class least likely. loss1 = tf.maximum(ZERO(), real - other + self.CONFIDENCE) # sum up the losses self.loss2 = reduce_sum(self.l2dist) self.loss1 = reduce_sum(self.const * loss1) self.loss = self.loss1 + self.loss2 # Setup the adam optimizer and keep track of variables we're creating start_vars = set(x.name for x in tf.global_variables()) optimizer = tf.train.AdamOptimizer(self.LEARNING_RATE) grads_and_vars = optimizer.compute_gradients(self.loss, [recon_img]) grads_and_vars = [(grads_and_vars[0][0], modifier)] self.train = optimizer.apply_gradients(grads_and_vars) #self.train = optimizer.minimize(self.loss, var_list=[modifier]) end_vars = tf.global_variables() new_vars = [x for x in end_vars if x.name not in start_vars] # these are the variables to initialize when we run self.setup = [] self.setup.append(self.timg.assign(self.assign_timg)) self.setup.append(self.tlab.assign(self.assign_tlab)) self.setup.append(self.const.assign(self.assign_const)) self.init = tf.variables_initializer(var_list=[modifier] + new_vars)
def __init__(self, sess, model, cl_model, batch_size, confidence, targeted, learning_rate, binary_search_steps, max_iterations, abort_early, initial_const, clip_min, clip_max, num_labels, shape): self.sess = sess self.TARGETED = targeted self.LEARNING_RATE = learning_rate self.MAX_ITERATIONS = max_iterations self.BINARY_SEARCH_STEPS = binary_search_steps self.ABORT_EARLY = abort_early self.CONFIDENCE = confidence self.initial_const = initial_const self.batch_size = batch_size self.clip_min = clip_min self.clip_max = clip_max self.model = model self.cl_model = cl_model latent_layer_model = Model(inputs=model.input, outputs=model.get_layer("latent").output) self.repeat = binary_search_steps >= 10 self.shape = shape = tuple([batch_size] + list(shape)) #print("shape: ", shape) # the variable we're going to optimize over modifier = tf.Variable(np.zeros(shape, dtype=np_dtype)) # these are variables to be more efficient in sending data to tf self.timg = tf.Variable(np.zeros(shape), dtype=tf_dtype, name='timg') self.targimg = tf.Variable(np.zeros(shape), dtype=tf_dtype, name='targimg') #self.tlab = tf.Variable( # np.zeros((batch_size, num_labels)), dtype=tf_dtype, name='tlab') self.const = tf.Variable(np.zeros(batch_size), dtype=tf_dtype, name='const') # and here's what we use to assign them self.assign_timg = tf.placeholder(tf_dtype, shape, name='assign_timg') self.assign_targimg = tf.placeholder(tf_dtype, shape, name='assign_targimg') #self.assign_tlab = tf.placeholder( # tf_dtype, (batch_size, num_labels), name='assign_tlab') self.assign_const = tf.placeholder(tf_dtype, [batch_size], name='assign_const') # the resulting instance, tanh'd to keep bounded from clip_min # to clip_max self.newimg = (tf.tanh(modifier + self.timg) + 1) / 2 self.newimg = self.newimg * (clip_max - clip_min) + clip_min targimg_lat = latent_layer_model.predict(self.targimg) self.x_hat = model.predict(self.newimg, steps=1) self.x_hat_lat = latent_layer_model.predict(self.newimg) self.y_hat_logit = cl_model.prediction(self.x_hat_lat, steps=1) self.y_hat = tf.argmax(self.y_hat_logit, axis=1) self.y_targ_logit = cl_model.predict(targimg_lat, steps=1) self.y_targ = tf.argmax(self.y_targ_logit, axis=1) # distance to the input data self.other = (tf.tanh(self.timg) + 1) / 2 self.other = self.other * (clip_max - clip_min) + clip_min self.l2dist = reduce_sum(tf.square(self.newimg - self.other), list(range(1, len(shape)))) print("shape of l2_dist: ", np.shape(self.l2dist)) epsilon = 10e-8 loss1 = reduce_sum(tf.square(self.x_hat_lat - targimg_lat)) # sum up the losses self.loss2 = reduce_sum(self.l2dist) self.loss1 = reduce_sum(self.const * loss1) self.loss = self.loss1 + self.loss2 # Setup the adam optimizer and keep track of variables we're creating start_vars = set(x.name for x in tf.global_variables()) optimizer = tf.train.AdamOptimizer(self.LEARNING_RATE) self.train = optimizer.minimize(self.loss, var_list=[modifier]) end_vars = tf.global_variables() new_vars = [x for x in end_vars if x.name not in start_vars] # these are the variables to initialize when we run self.setup = [] self.setup.append(self.timg.assign(self.assign_timg)) self.setup.append(self.targimg.assign(self.assign_targimg)) #self.setup.append(self.tlab.assign(self.assign_tlab)) self.setup.append(self.const.assign(self.assign_const)) self.init = tf.variables_initializer(var_list=[modifier] + new_vars)
def __init__(self, sess, model, ensemble, batch_size, confidence, targeted, learning_rate, binary_search_steps, max_iterations, abort_early, initial_const, clip_min, clip_max, num_labels, shape): """ """ self.sess = sess self.TARGETED = targeted self.LEARNING_RATE = learning_rate self.MAX_ITERATIONS = max_iterations self.BINARY_SEARCH_STEPS = binary_search_steps self.ABORT_EARLY = abort_early self.CONFIDENCE = confidence self.initial_const = initial_const self.batch_size = batch_size self.clip_min = clip_min self.clip_max = clip_max self.model = model self.ensemble = ensemble self.repeat = binary_search_steps >= 10 self.shape = shape = tuple([batch_size] + list(shape)) # the variable we're going to optimize over modifier = tf.Variable(np.zeros(shape, dtype=np_dtype)) # these are variables to be more efficient in sending data to tf self.timg = tf.Variable(np.zeros(shape), dtype=tf_dtype, name='timg') self.tlab = tf.Variable(np.zeros((batch_size, num_labels)), dtype=tf_dtype, name='tlab') self.const = tf.Variable(np.zeros(batch_size), dtype=tf_dtype, name='const') # and here's what we use to assign them self.assign_timg = tf.placeholder(tf_dtype, shape, name='assign_timg') self.assign_tlab = tf.placeholder(tf_dtype, (batch_size, num_labels), name='assign_tlab') self.assign_const = tf.placeholder(tf_dtype, [batch_size], name='assign_const') # the resulting instance, tanh'd to keep bounded from clip_min # to clip_max self.newimg = (tf.tanh(modifier + self.timg) + 1) / 2 self.newimg = self.newimg * (clip_max - clip_min) + clip_min # prediction BEFORE-SOFTMAX of the model self.output = model.get_logits(self.newimg) # distance to the input data self.other = (tf.tanh(self.timg) + 1) / \ 2 * (clip_max - clip_min) + clip_min self.l2dist = reduce_sum(tf.square(self.newimg - self.other), list(range(1, len(shape)))) # compute the probability of the label class versus the maximum other real = reduce_sum((self.tlab) * self.output, 1) other = reduce_max((1 - self.tlab) * self.output - self.tlab * 10000, 1) if self.TARGETED: # if targeted, optimize for making the other class most likely loss1 = tf.maximum(ZERO(), other - real + self.CONFIDENCE) else: # if untargeted, optimize for making this class least likely. loss1 = tf.maximum(ZERO(), real - other + self.CONFIDENCE) # sum up the losses self.loss2 = reduce_sum(self.l2dist) # ==================== Add ensemble part ==================== # # Get the number of small nets for each class self.n_nets = np.array([len(x) for x in self.ensemble]) # Max number of small nets in one class n_nets_max = np.max(self.n_nets) # Gather all outputs from the ensemble all_nets = [] for i in range(num_labels): class_nets = [] for j in range(n_nets_max): if j < self.n_nets[i]: class_nets.append(self.ensemble[i][j].get_logits( self.newimg)) else: # Padding: append [0, 0] for classes that have the number # of NNs less than n_nets_max class_nets.append(tf.zeros([batch_size, 2])) all_nets.append(tf.stack(class_nets, axis=1)) self.ensemble_out = tf.stack(all_nets, axis=1) # Based on output, see which set of the ensemble to use # Find label/class to look for in ensemble if self.TARGETED: label = tf.argmax(self.tlab, axis=1) else: # Output of original image self.orig_output = model.get_logits(self.other) label = tf.argmax(self.orig_output, axis=1) ind = tf.range(batch_size, dtype=tf.int64) ind_label = tf.stack([ind, label], axis=1) # Use gather_nd to do numpy slicing self.label_nets = tf.gather_nd(self.ensemble_out, ind_label) # DEBUG # print("self.ensemble_out: ", self.ensemble_out) # print("label: ", label) # print("ind_label: ", ind_label) # print("label_nets: ", self.label_nets) # Get the loss function for the small net part if self.TARGETED: diff = self.label_nets[:, :, 0] - self.label_nets[:, :, 1] else: diff = self.label_nets[:, :, 1] - self.label_nets[:, :, 0] # Find the largest difference among small nets max_diff = tf.reduce_max(diff, axis=1) # Add confidence margin and clip at zero ensemble_loss = tf.maximum(ZERO(), max_diff + self.CONFIDENCE) # The objective function only includes max(clf_loss, any_ensemble_loss) loss1 = tf.maximum(loss1, tf.squeeze(ensemble_loss)) self.loss1 = reduce_sum(self.const * loss1) self.loss = self.loss1 + self.loss2 # DEBUG # print("max_diff: ", max_diff) # print("ensemble_loss: ", ensemble_loss) # print("loss1: ", loss1) # print("reduce_sum loss1: ", self.loss1) # Setup the adam optimizer and keep track of variables we're creating start_vars = set(x.name for x in tf.global_variables()) optimizer = tf.train.AdamOptimizer(self.LEARNING_RATE) self.train = optimizer.minimize(self.loss, var_list=[modifier]) end_vars = tf.global_variables() new_vars = [x for x in end_vars if x.name not in start_vars] # these are the variables to initialize when we run self.setup = [] self.setup.append(self.timg.assign(self.assign_timg)) self.setup.append(self.tlab.assign(self.assign_tlab)) self.setup.append(self.const.assign(self.assign_const)) self.init = tf.variables_initializer(var_list=[modifier] + new_vars)
def __init__(self, sess, model,cl_model, batch_size, confidence, targeted, learning_rate, binary_search_steps, max_iterations, abort_early, initial_const, clip_min, clip_max, num_labels, shape): self.sess = sess self.TARGETED = targeted self.LEARNING_RATE = learning_rate self.MAX_ITERATIONS = max_iterations self.BINARY_SEARCH_STEPS = binary_search_steps self.ABORT_EARLY = abort_early self.CONFIDENCE = confidence self.initial_const = initial_const self.batch_size = batch_size self.clip_min = clip_min self.clip_max = clip_max self.model = model self.cl_model = cl_model #convert model to tensorflow model self.repeat = binary_search_steps >= 10 self.shape = shape = tuple([batch_size] + list(shape)) #print("shape: ", shape) # the variable we're going to optimize over modifier = tf.Variable(np.zeros(shape, dtype=np_dtype)) # these are variables to be more efficient in sending data to tf self.timg = tf.Variable(np.zeros(shape), dtype=tf_dtype, name='timg') self.targimg = tf.Variable(np.zeros(shape), dtype=tf_dtype, name='targimg') #self.tlab = tf.Variable( # np.zeros((batch_size, num_labels)), dtype=tf_dtype, name='tlab') self.const = tf.Variable( np.zeros(batch_size), dtype=tf_dtype, name='const') # and here's what we use to assign them self.assign_timg = tf.placeholder(tf_dtype, shape, name='assign_timg') self.assign_targimg = tf.placeholder(tf_dtype, shape, name='assign_targimg') #self.assign_tlab = tf.placeholder( # tf_dtype, (batch_size, num_labels), name='assign_tlab') self.assign_const = tf.placeholder( tf_dtype, [batch_size], name='assign_const') # the resulting instance, tanh'd to keep bounded from clip_min # to clip_max self.newimg = (tf.tanh(modifier + self.timg) + 1) / 2 self.newimg = self.newimg * (clip_max - clip_min) + clip_min #targimg_lat = latent_layer_model.predict(self.targimg) ''' tf_model_path_ae = convert_to_pb('cifar10_AE.h5','../cleverhans_tutorials/models','../cleverhans_tutorials/models', 'AE') tf_model_path_cl = convert_to_pb('cifar10_CNN.h5','../cleverhans_tutorials/models','../cleverhans_tutorials/models', 'Classifier') tf_model,tf_input,tf_output = load_graph(tf_model_path_ae) tf_cl_model,tf_cl_input,tf_cl_output = load_graph(tf_model_path_cl) #self.x_hat = model.predict(self.newimg, steps = 1) with tf.Graph().as_default() as graph1: x_hat_output = tf_model.get_tensor_by_name(tf_output) x_hat_input = tf_model.get_tensor_by_name(tf_input) #self.x_hat_lat = latent_layer_model.predict(self.newimg) #self.x_hat = graph1.run(self.x_hat, feed_dict = {x_1 : self.newimg}) #self.y_hat_logit = cl_model.predict(self.x_hat, steps = 1) with tf.Graph().as_default() as graph2: y_hat_logit = tf_cl_model.get_tensor_by_name(tf_cl_output) y_hat_logit_input = tf_cl_model.get_tensor_by_name(tf_cl_input) #self.y_hat_logit = self.sess.run(self.y_hat_logit, feed_dict = {x_2 : self.x_hat}) #self.y_hat_logit = cl_model.predict(self.x_hat, steps = 1) y_hat_output = tf.argmax(y_hat_logit, axis = 1) x_1 = tf.placeholder(tf.float32, (None, 32,32, 3)) graph = tf.get_default_graph() meta_graph1 = tf.train.export_meta_graph(graph=graph1) meta_graph.import_scoped_meta_graph(meta_graph1, input_map={'x_hat_input': x_1}, import_scope='graph1', out1 = graph.get_tensor_by_name('graph1/tf_output:0')) meta_graph2 = tf.train.export_meta_graph(graph=graph2) meta_graph.import_scoped_meta_graph(meta_graph2, input_map={'y_hat_logit_input': out1}, import_scope='graph2') #self.y_targ_logit = cl_model.predict(self.targimg, steps = 1) self.y_targ_logit = tf_cl_model.get_tensor_by_name(tf_cls_output) self.y_targ_logit = sess.run(self.y_targ_logit, feed_dict = {tf_cl_model.get_tensor_by_name(tf_cl_input): self.targimg}) self.y_targ = tf.argmax(self.y_targ_logit, axis = 1) ''' # distance to the input data #print("model.outputs: ", model.outputs) #print("model.inputs: ", model.inputs) frozen_graph = freeze_session(K.get_session(),output_names=[out.op.name for out in model.outputs]) tf.train.write_graph(frozen_graph, "../cleverhans_tutorials/models", "tf_model_AE.pb", as_text=False) from tensorflow.python.platform import gfile f = gfile.FastGFile("../cleverhans_tutorials/models/tf_model_AE.pb", 'rb') graph_def = tf.GraphDef() # Parses a serialized binary message into the current message. graph_def.ParseFromString(f.read()) f.close() sess.graph.as_default() tf.import_graph_def(graph_def) reconstruction_tensor = sess.graph.get_tensor_by_name('import/activation_7/Sigmoid:0') #self.x_hat = reconstruction_tensor(self.newimg) #self.y_hat_logit = cl_model.predict(self.x_hat, steps=1) #self.y_hat = tf.argmax(self.y_hat_logit, axis = 1) #self.x_hat = sess.run(reconstruction_tensor, {'import/input_1:0': self.newimg}) self.other = (tf.tanh(self.timg) + 1) / 2 self.other = self.other * (clip_max - clip_min) + clip_min self.l2dist = reduce_sum( tf.square(self.newimg - self.other), list(range(1, len(shape)))) print("shape of l2_dist: ", np.shape(self.l2dist)) epsilon = 10e-8 loss1 = reduce_sum(tf.square(self.x_hat-self.targimg)) # sum up the losses self.loss2 = reduce_sum(self.l2dist) self.loss1 = reduce_sum(self.const * loss1) self.loss = self.loss1 + self.loss2 # Setup the adam optimizer and keep track of variables we're creating start_vars = set(x.name for x in tf.global_variables()) optimizer = tf.train.AdamOptimizer(self.LEARNING_RATE) self.train = optimizer.minimize(self.loss, var_list=[modifier]) end_vars = tf.global_variables() new_vars = [x for x in end_vars if x.name not in start_vars] # these are the variables to initialize when we run self.setup = [] self.setup.append(self.timg.assign(self.assign_timg)) self.setup.append(self.targimg.assign(self.assign_targimg)) #self.setup.append(self.tlab.assign(self.assign_tlab)) self.setup.append(self.const.assign(self.assign_const)) self.init = tf.variables_initializer(var_list=[modifier] + new_vars)
def fgm(x, logits, y=None, eps=0.3, ord=np.inf, clip_min=None, clip_max=None, clip_grad=False, targeted=False, sanity_checks=True): """ TensorFlow implementation of the Fast Gradient Method. :param x: the input placeholder :param logits: output of model.get_logits :param y: (optional) A placeholder for the true labels. If targeted is true, then provide the target label. Otherwise, only provide this parameter if you'd like to use true labels when crafting adversarial samples. Otherwise, model predictions are used as labels to avoid the "label leaking" effect (explained in this paper: https://arxiv.org/abs/1611.01236). Default is None. Labels should be one-hot-encoded. :param eps: the epsilon (input variation parameter) :param ord: (optional) Order of the norm (mimics NumPy). Possible values: np.inf, 1 or 2. :param clip_min: Minimum float value for adversarial example components :param clip_max: Maximum float value for adversarial example components :param clip_grad: (optional bool) Ignore gradient components at positions where the input is already at the boundary of the domain, and the update step will get clipped out. :param targeted: Is the attack targeted or untargeted? Untargeted, the default, will try to make the label incorrect. Targeted will instead try to move in the direction of being more like y. :return: a tensor for the adversarial example """ asserts = [] # If a data range was specified, check that the input was in that range if clip_min is not None: asserts.append( utils_tf.assert_greater_equal(x, tf.cast(clip_min, x.dtype))) if clip_max is not None: asserts.append( utils_tf.assert_less_equal(x, tf.cast(clip_max, x.dtype))) # Make sure the caller has not passed probs by accident assert logits.op.type != 'Softmax' if y is None: # Using model predictions as ground truth to avoid label leaking preds_max = reduce_max(logits, 1, keepdims=True) y = tf.to_float(tf.equal(logits, preds_max)) y = tf.stop_gradient(y) y = y / reduce_sum(y, 1, keepdims=True) # Compute loss loss = softmax_cross_entropy_with_logits(labels=y, logits=logits) if targeted: loss = -loss # Define gradient of loss wrt input grad, = tf.gradients(loss, x) if clip_grad: grad = utils_tf.zero_out_clipped_grads(grad, x, clip_min, clip_max) optimal_perturbation = optimize_linear(grad, eps, ord) # Add perturbation to original example to obtain adversarial example adv_x = x + optimal_perturbation # If clipping is needed, reset all values outside of [clip_min, clip_max] if (clip_min is not None) or (clip_max is not None): # We don't currently support one-sided clipping assert clip_min is not None and clip_max is not None adv_x = utils_tf.clip_by_value(adv_x, clip_min, clip_max) if sanity_checks: with tf.control_dependencies(asserts): adv_x = tf.identity(adv_x) return adv_x
def sparse_l1_descent(x, logits, y=None, eps=1.0, q=99, loss_fn=softmax_cross_entropy_with_logits, clip_min=None, clip_max=None, clip_grad=False, targeted=False, sanity_checks=True): """ TensorFlow implementation of the Dense L1 Descent Method. :param x: the input placeholder :param logits: output of model.get_logits :param y: (optional) A placeholder for the true labels. If targeted is true, then provide the target label. Otherwise, only provide this parameter if you'd like to use true labels when crafting adversarial samples. Otherwise, model predictions are used as labels to avoid the "label leaking" effect (explained in this paper: https://arxiv.org/abs/1611.01236). Default is None. Labels should be one-hot-encoded. :param eps: the epsilon (input variation parameter) :param q: the percentile above which gradient values are retained. Either a scalar or a vector of same length as the input batch dimension. :param loss_fn: Loss function that takes (labels, logits) as arguments and returns loss :param clip_min: Minimum float value for adversarial example components :param clip_max: Maximum float value for adversarial example components :param clip_grad: (optional bool) Ignore gradient components at positions where the input is already at the boundary of the domain, and the update step will get clipped out. :param targeted: Is the attack targeted or untargeted? Untargeted, the default, will try to make the label incorrect. Targeted will instead try to move in the direction of being more like y. :return: a tensor for the adversarial example """ asserts = [] # If a data range was specified, check that the input was in that range if clip_min is not None: asserts.append( utils_tf.assert_greater_equal(x, tf.cast(clip_min, x.dtype))) if clip_max is not None: asserts.append( utils_tf.assert_less_equal(x, tf.cast(clip_max, x.dtype))) # Make sure the caller has not passed probs by accident assert logits.op.type != 'Softmax' if y is None: # Using model predictions as ground truth to avoid label leaking preds_max = reduce_max(logits, 1, keepdims=True) y = tf.cast(tf.equal(logits, preds_max), dtype=tf.float32) y = tf.stop_gradient(y) y = y / reduce_sum(y, 1, keepdims=True) # Compute loss loss = loss_fn(labels=y, logits=logits) if targeted: loss = -loss # Define gradient of loss wrt input grad, = tf.gradients(ys=loss, xs=x) if clip_grad: grad = utils_tf.zero_out_clipped_grads(grad, x, clip_min, clip_max) red_ind = list(range(1, len(grad.get_shape()))) dim = tf.reduce_prod(input_tensor=tf.shape(input=x)[1:]) abs_grad = tf.reshape(tf.abs(grad), (-1, dim)) # if q is a scalar, broadcast it to a vector of same length as the batch dim q = tf.cast(tf.broadcast_to(q, tf.shape(input=x)[0:1]), tf.float32) k = tf.cast(tf.floor(q / 100 * tf.cast(dim, tf.float32)), tf.int32) # `tf.sort` is much faster than `tf.contrib.distributions.percentile`. # For TF <= 1.12, use `tf.nn.top_k` as `tf.sort` is not implemented. if LooseVersion(tf.__version__) <= LooseVersion('1.12.0'): # `tf.sort` is only available in TF 1.13 onwards sorted_grad = -tf.nn.top_k(-abs_grad, k=dim, sorted=True)[0] else: sorted_grad = tf.sort(abs_grad, axis=-1) idx = tf.stack((tf.range(tf.shape(input=abs_grad)[0]), k), -1) percentiles = tf.gather_nd(sorted_grad, idx) tied_for_max = tf.greater_equal(abs_grad, tf.expand_dims(percentiles, -1)) tied_for_max = tf.reshape(tf.cast(tied_for_max, x.dtype), tf.shape(input=grad)) num_ties = tf.reduce_sum(input_tensor=tied_for_max, axis=red_ind, keepdims=True) optimal_perturbation = tf.sign(grad) * tied_for_max / num_ties # Add perturbation to original example to obtain adversarial example adv_x = x + utils_tf.mul(eps, optimal_perturbation) # If clipping is needed, reset all values outside of [clip_min, clip_max] if (clip_min is not None) or (clip_max is not None): # We don't currently support one-sided clipping assert clip_min is not None and clip_max is not None adv_x = utils_tf.clip_by_value(adv_x, clip_min, clip_max) if sanity_checks: with tf.control_dependencies(asserts): adv_x = tf.identity(adv_x) return adv_x
def __init__(self, sess, model, batch_size, confidence, targeted, learning_rate, binary_search_steps, max_iterations, abort_early, initial_const, clip_min, clip_max, num_labels, shape): """ Return a tensor that constructs adversarial examples for the given input. Generate uses tf.py_func in order to operate over tensors. :param sess: a TF session. :param model: a cleverhans.model.Model object. :param batch_size: Number of attacks to run simultaneously. :param confidence: Confidence of adversarial examples: higher produces examples with larger l2 distortion, but more strongly classified as adversarial. :param targeted: boolean controlling the behavior of the adversarial examples produced. If set to False, they will be misclassified in any wrong class. If set to True, they will be misclassified in a chosen target class. :param learning_rate: The learning rate for the attack algorithm. Smaller values produce better results but are slower to converge. :param binary_search_steps: The number of times we perform binary search to find the optimal tradeoff- constant between norm of the purturbation and confidence of the classification. :param max_iterations: The maximum number of iterations. Setting this to a larger value will produce lower distortion results. Using only a few iterations requires a larger learning rate, and will produce larger distortion results. :param abort_early: If true, allows early aborts if gradient descent is unable to make progress (i.e., gets stuck in a local minimum). :param initial_const: The initial tradeoff-constant to use to tune the relative importance of size of the pururbation and confidence of classification. If binary_search_steps is large, the initial constant is not important. A smaller value of this constant gives lower distortion results. :param clip_min: (optional float) Minimum input component value. :param clip_max: (optional float) Maximum input component value. :param num_labels: the number of classes in the model's output. :param shape: the shape of the model's input tensor. """ self.sess = sess self.TARGETED = targeted self.LEARNING_RATE = learning_rate self.MAX_ITERATIONS = max_iterations self.BINARY_SEARCH_STEPS = binary_search_steps self.ABORT_EARLY = abort_early self.CONFIDENCE = confidence self.initial_const = initial_const self.batch_size = batch_size self.clip_min = clip_min self.clip_max = clip_max self.model = model self.repeat = binary_search_steps >= 10 self.shape = shape = tuple([batch_size] + list(shape)) # the variable we're going to optimize over modifier = tf.Variable(np.zeros(shape, dtype=np_dtype)) # these are variables to be more efficient in sending data to tf self.timg = tf.Variable(np.zeros(shape), dtype=tf_dtype, name='timg') self.tlab = tf.Variable(np.zeros((batch_size, num_labels)), dtype=tf_dtype, name='tlab') self.const = tf.Variable(np.zeros(batch_size), dtype=tf_dtype, name='const') # and here's what we use to assign them self.assign_timg = tf.placeholder(tf_dtype, shape, name='assign_timg') self.assign_tlab = tf.placeholder(tf_dtype, (batch_size, num_labels), name='assign_tlab') self.assign_const = tf.placeholder(tf_dtype, [batch_size], name='assign_const') # the resulting instance, tanh'd to keep bounded from clip_min # to clip_max self.newimg = (tf.tanh(modifier + self.timg) + 1) / 2 self.newimg = self.newimg * (clip_max - clip_min) + clip_min # prediction BEFORE-SOFTMAX of the model self.output = model.get_logits(self.newimg) # distance to the input data self.other = (tf.tanh(self.timg) + 1) / \ 2 * (clip_max - clip_min) + clip_min self.l2dist = reduce_sum(tf.square(self.newimg - self.other), list(range(1, len(shape)))) # compute the probability of the label class versus the maximum other real = reduce_sum((self.tlab) * self.output, 1) other = reduce_max((1 - self.tlab) * self.output - self.tlab * 10000, 1) if self.TARGETED: # if targeted, optimize for making the other class most likely loss1 = tf.maximum(ZERO(), other - real + self.CONFIDENCE) else: # if untargeted, optimize for making this class least likely. loss1 = tf.maximum(ZERO(), real - other + self.CONFIDENCE) # sum up the losses self.loss2 = reduce_sum(self.l2dist) self.loss1 = reduce_sum(self.const * loss1) self.loss = self.loss1 + self.loss2 # Setup the adam optimizer and keep track of variables we're creating start_vars = set(x.name for x in tf.global_variables()) optimizer = tf.train.AdamOptimizer(self.LEARNING_RATE) self.train = optimizer.minimize(self.loss, var_list=[modifier]) end_vars = tf.global_variables() new_vars = [x for x in end_vars if x.name not in start_vars] # these are the variables to initialize when we run self.setup = [] self.setup.append(self.timg.assign(self.assign_timg)) self.setup.append(self.tlab.assign(self.assign_tlab)) self.setup.append(self.const.assign(self.assign_const)) self.init = tf.variables_initializer(var_list=[modifier] + new_vars)
def __init__(self, sess, model, batch_size, confidence, targeted, learning_rate, const_a_min, const_a_max, max_iterations, clip_min, clip_max, num_labels, shape): """ Return a tensor that constructs adversarial examples for the given input. Generate uses tf.py_func in order to operate over tensors. :param sess: a TF session. :param model: a cleverhans.model.Model object. :param batch_size: Number of attacks to run simultaneously. :param confidence: Confidence of adversarial examples: higher produces examples with larger l2 distortion, but more strongly classified as adversarial. :param targeted: boolean controlling the behavior of the adversarial examples produced. If set to False, they will be misclassified in any wrong class. If set to True, they will be misclassified in a chosen target class. :param learning_rate: The learning rate for the attack algorithm. Smaller values produce better results but are slower to converge. :param const_a_min: The constant value for parameter a (min). :param const_a_max: The constant value for parameter a (max). :param max_iterations: The maximum number of iterations. Setting this to a larger value will produce lower distortion results. Using only a few iterations requires a larger learning rate, and will produce larger distortion results. :param clip_min: (optional float) Minimum input component value. :param clip_max: (optional float) Maximum input component value. :param num_labels: the number of classes in the model's output. :param shape: the shape of the model's input tensor. """ self.sess = sess self.TARGETED = targeted self.LEARNING_RATE = learning_rate self.MAX_ITERATIONS = max_iterations self.CONST_A_MIN = const_a_min self.CONST_A_MAX = const_a_max self.CONFIDENCE = confidence self.batch_size = batch_size self.clip_min = clip_min self.clip_max = clip_max self.model = model self.shape = shape = tuple([batch_size] + list(shape)) # the variable we're going to optimize over modifier = tf.Variable(np.zeros(shape, dtype=np_dtype)) # these are variables to be more efficient in sending data to tf self.timg = tf.Variable(np.zeros(shape), dtype=tf_dtype, name='timg') self.tlab = tf.Variable( np.zeros((batch_size, num_labels)), dtype=tf_dtype, name='tlab') self.const = tf.Variable( np.zeros(batch_size), dtype=tf_dtype, name='const') # and here's what we use to assign them self.assign_timg = tf.placeholder(tf_dtype, shape, name='assign_timg') self.assign_tlab = tf.placeholder( tf_dtype, (batch_size, num_labels), name='assign_tlab') self.assign_const = tf.placeholder( tf_dtype, [batch_size], name='assign_const') # the resulting instance, tanh'd to keep bounded from clip_min # to clip_max self.newimg = (tf.tanh(modifier + self.timg) + 1) / 2 self.newimg = self.newimg * (clip_max - clip_min) + clip_min # prediction BEFORE-SOFTMAX of the model self.output = model.get_logits(self.newimg) # distance to the input data self.other = (tf.tanh(self.timg) + 1) / \ 2 * (clip_max - clip_min) + clip_min self.l2dist = reduce_sum( tf.square(self.newimg - self.other), list(range(1, len(shape)))) # compute the probability of the label class versus the maximum other real = reduce_sum((self.tlab) * self.output, 1) other = reduce_max((1 - self.tlab) * self.output - self.tlab * 10000, 1) if self.TARGETED: # if targeted, optimize for making the other class most likely loss1 = tf.maximum(ZERO(), other - real + self.CONFIDENCE) else: # if untargeted, optimize for making this class least likely. loss1 = tf.maximum(ZERO(), real - other + self.CONFIDENCE) # sum up the losses self.loss2 = reduce_sum(self.l2dist) self.loss1 = reduce_sum(self.const * loss1) self.loss = self.loss1 + self.loss2 # Setup the adam optimizer and keep track of variables we're creating start_vars = set(x.name for x in tf.global_variables()) batch_step = tf.Variable(99, trainable=False) learn_rate = tf.train.inverse_time_decay(learning_rate=self.LEARNING_RATE*100, global_step=batch_step * batch_size, decay_steps=1.0, decay_rate=1.0) optimizer = tf.train.MomentumOptimizer(learning_rate=learn_rate, momentum=0.0, use_nesterov=False) # Passing batch_step to minimize() will increment it at each step self.train = optimizer.minimize(self.loss, var_list=[modifier], global_step=batch_step) end_vars = tf.global_variables() new_vars = [x for x in end_vars if x.name not in start_vars] # these are the variables to initialize when we run self.setup = [] self.setup.append(self.timg.assign(self.assign_timg)) self.setup.append(self.tlab.assign(self.assign_tlab)) self.setup.append(self.const.assign(self.assign_const)) self.init = tf.variables_initializer(var_list=[modifier] + new_vars)