def loop_body(i, perturbation, flat_optim_state): """Update perturbation to input image.""" optim_state = nest.pack_sequence_as( structure=init_optim_state, flat_sequence=flat_optim_state) def wrapped_loss_fn(x): return loss_fn(input_image + x, label) new_perturbation_list, new_optim_state = optimizer.minimize( wrapped_loss_fn, [perturbation], optim_state) projected_perturbation = project_perturbation(new_perturbation_list[0], epsilon, input_image, clip_min=clip_min, clip_max=clip_max) # Be careful with this bool. A value of 0. is a valid threshold but evaluates to False, so we must explicitly # check whether the value is None. early_stop = early_stop_loss_threshold is not None compute_loss = is_debug or early_stop # Don't waste time building the loss graph if we're not going to use it if compute_loss: # NOTE: this step is not actually redundant with the optimizer step. # SPSA calculates the loss at randomly perturbed points but doesn't calculate the loss at the current point. loss = reduce_mean(wrapped_loss_fn(projected_perturbation), axis=0) if is_debug: with tf.device("/cpu:0"): loss = tf.Print(loss, [loss], "Total batch loss") if early_stop: i = tf.cond(tf.less(loss, early_stop_loss_threshold), lambda: float(num_steps), lambda: i) return i + 1, projected_perturbation, nest.flatten(new_optim_state)
def _compute_gradients(self, loss_fn, x, unused_optim_state): """Compute a new value of `x` to minimize `loss_fn`. Args: loss_fn: a callable that takes `x`, a batch of images, and returns a batch of loss values. `x` will be optimized to minimize `loss_fn(x)`. x: A list of Tensors, the values to be updated. This is analogous to the `var_list` argument in standard TF Optimizer. unused_optim_state: A (possibly nested) dict, containing any state info needed for the optimizer. Returns: new_x: A list of Tensors, the same length as `x`, which are updated new_optim_state: A dict, with the same structure as `optim_state`, which have been updated. """ # Assumes `x` is a list, # and contains a tensor representing a batch of images assert len(x) == 1 and isinstance( x, list), "x should be a list and contain only one image tensor" x = x[0] loss = reduce_mean(loss_fn(x), axis=0) return tf.gradients(loss, x)
def fprop(self, x, **kwargs): axis = [1, 2, 3] mean = reduce_mean(x, axis=axis, keepdims=True) variance = reduce_mean(tf.square(x), axis=axis, keepdims=True) - tf.square(mean) variance = tf.nn.relu(variance) stddev = tf.sqrt(variance) num_pixels = reduce_prod(tf.shape(x)[1:]) min_stddev = tf.rsqrt(tf.to_float(num_pixels)) pixel_value_scale = tf.maximum(stddev, min_stddev) pixel_value_offset = mean x = tf.subtract(x, pixel_value_offset) x = tf.div(x, pixel_value_scale) return x
def model_loss(y, model, mean=True): """ Define loss of TF graph :param y: correct labels :param model: output of the model :param mean: boolean indicating whether should return mean of loss or vector of losses for each input of the batch :return: return mean of loss if True, otherwise return vector with per sample loss """ warnings.warn('This function is deprecated.', DeprecationWarning) op = model.op if op.type == "Softmax": logits, = op.inputs else: logits = model try: y = tf.stop_gradient(y) out = tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=y) except AttributeError: warning = "Running on tensorflow version " + \ LooseVersion(tf.__version__).vstring + \ ". This version will not be supported by CleverHans" + \ "in the future." warnings.warn(warning) out = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y) if mean: out = reduce_mean(out) return out
def body(i, ax, m): logits = self.model.get_logits(ax) loss = self.loss_func(labels=y, logits=logits) if targeted: loss = -loss # Define gradient of loss wrt input grad, = tf.gradients(loss, ax) # Normalize current gradient and add it to the accumulated gradient red_ind = list(xrange(1, len(grad.get_shape()))) avoid_zero_div = tf.cast(1e-12, grad.dtype) grad = grad / tf.maximum( avoid_zero_div, reduce_mean(tf.abs(grad), red_ind, keepdims=True)) m = self.decay_factor * m + grad optimal_perturbation = optimize_linear(m, self.eps_iter, self.ord) if self.ord == 1: raise NotImplementedError( "This attack hasn't been tested for ord=1." "It's not clear that FGM makes a good inner " "loop step for iterative optimization since " "it updates just one coordinate at a time.") # Update and clip adversarial example in current iteration ax = ax + optimal_perturbation ax = x + utils_tf.clip_eta(ax - x, self.ord, self.eps) if self.clip_min is not None and self.clip_max is not None: ax = utils_tf.clip_by_value(ax, self.clip_min, self.clip_max) ax = tf.stop_gradient(ax) return i + 1, ax, m
def model_loss(y, model, mean=True): """ Define loss of TF graph :param y: correct labels :param model: output of the model :param mean: boolean indicating whether should return mean of loss or vector of losses for each input of the batch :return: return mean of loss if True, otherwise return vector with per sample loss """ warnings.warn('This function is deprecated.', DeprecationWarning) op = model.op if op.type == "Softmax": logits, = op.inputs else: logits = model try: y = tf.stop_gradient(y) out = tf.nn.softmax_cross_entropy_with_logits_v2( logits=logits, labels=y) except AttributeError: warning = "Running on tensorflow version " + \ LooseVersion(tf.__version__).vstring + \ ". This version will not be supported by CleverHans" + \ "in the future." warnings.warn(warning) out = tf.nn.softmax_cross_entropy_with_logits( logits=logits, labels=y) if mean: out = reduce_mean(out) return out
def body(i, grad_array): delta = self._delta delta_x = self._get_delta(x, delta) delta_x = tf.concat([delta_x, -delta_x], axis=0) loss_vals = tf.reshape(loss_fn(x + delta_x), [2 * self._num_samples] + [1] * (len(x_shape) - 1)) avg_grad = reduce_mean(loss_vals * delta_x, axis=0) / delta avg_grad = tf.expand_dims(avg_grad, axis=0) new_grad_array = grad_array.write(i, avg_grad) return i + 1, new_grad_array
def kl_with_logits(p_logits, q_logits, scope=None, loss_collection=tf.GraphKeys.REGULARIZATION_LOSSES): """Helper function to compute kl-divergence KL(p || q) """ with tf.name_scope(scope, "kl_divergence") as name: p = tf.nn.softmax(p_logits) p_log = tf.nn.log_softmax(p_logits) q_log = tf.nn.log_softmax(q_logits) loss = reduce_mean(reduce_sum(p * (p_log - q_log), axis=1), name=name) tf.losses.add_loss(loss, loss_collection) return loss
def model_loss(y, model, mean=True): """ Define loss of TF graph :param y: correct labels :param model: output of the model :param mean: boolean indicating whether should return mean of loss or vector of losses for each input of the batch :return: return mean of loss if True, otherwise return vector with per sample loss """ warnings.warn('This function is deprecated.') op = model.op if op.type == "Softmax": logits, = op.inputs else: logits = model out = softmax_cross_entropy_with_logits(logits=logits, labels=y) if mean: out = reduce_mean(out) return out
def model_loss(y, model, mean=True): """ Define loss of TF graph :param y: correct labels :param model: output of the model :param mean: boolean indicating whether should return mean of loss or vector of losses for each input of the batch :return: return mean of loss if True, otherwise return vector with per sample loss """ warnings.warn("This function is deprecated and will be removed on or after" " 2019-04-05. Switch to cleverhans.train.train.") op = model.op if op.type == "Softmax": (logits, ) = op.inputs else: logits = model out = softmax_cross_entropy_with_logits(logits=logits, labels=y) if mean: out = reduce_mean(out) return out
def generate(self, x, **kwargs): assert self.parse_params(**kwargs) asserts = [] if self.clip_min is not None: asserts.append(utils_tf.assert_greater_equal( x, tf.cast(self.clip_min,x.dtype))) if self.clip_max is not None: asserts.append(utils_tf.assert_less_equal( x, tf.cast(self.clip_max, x.dtype))) m_cache = tf.zeros_like(x) v_cache = tf.zeros_like(x) adv_x = x y, _nb_classes = self.get_or_guess_labels(x, kwargs) y = y / reduce_sum(y, 1, keepdims=True) targeted = (self.y_target is not None) def save_batch(directory, images, labels, iteration, batch_idx): for idx, (image, label) in enumerate(zip(images, labels)): filename = "id{}_b{}_it{}_l{}.png".format(idx, batch_idx, iteration, np.argmax(label)) save_image_np(join(directory, filename), image) for i in range(self.nb_iter): self.logger.debug("Starting #{} iteration".format(i + 1)) logits = self.model.get_logits(adv_x) loss = softmax_cross_entropy_with_logits(labels=y, logits=logits) if targeted: loss = -loss grad, = tf.gradients(loss, adv_x) red_ind = list(range(1, len(grad.get_shape()))) avoid_zero_div = tf.cast(1e-8, grad.dtype) grad = grad / tf.maximum( avoid_zero_div, reduce_mean(tf.abs(grad), red_ind, keepdims=True)) m_cache = self.betha1 * m_cache + (1 - self.betha1) * grad v_cache = self.betha2 * v_cache + (1 - self.betha2) * tf.square(grad) update = tf.divide(m_cache, tf.sqrt(v_cache + avoid_zero_div)) optimal_perturbation = optimize_linear(update, self.eps_iter, self.ord) if self.ord == 1: raise NotImplementedError("This attack hasn't been tested for ord=1." "It's not clear that FGM makes a good inner " "loop step for iterative optimization since " "it updates just one coordinate at a time.") adv_x = adv_x + optimal_perturbation adv_x = x + utils_tf.clip_eta(adv_x - x, self.ord, self.eps) if self.clip_min is not None and self.clip_max is not None: adv_x = utils_tf.clip_by_value(adv_x, self.clip_min, self.clip_max) adv_x = tf.stop_gradient(adv_x) if self.sanity_checks: with tf.control_dependencies(asserts): adv_x = tf.identity(adv_x) with self.sess.as_default(): self.sess.run(self.init_op) for batch in range(self.nb_batches): adv_x_np, y_np = self.sess.run([adv_x, y]) self.logger.debug("Saving attacked batch #{}".format(batch + 1)) save_batch(self.adv_dir, adv_x_np, y_np, i, batch)
def __init__(self, sess, model, batch_size, confidence, targeted, learning_rate, binary_search_steps, max_iterations, abort_early, initial_const, clip_min, clip_max, num_labels, shape): """ """ self.sess = sess self.TARGETED = targeted self.LEARNING_RATE = learning_rate self.MAX_ITERATIONS = max_iterations self.BINARY_SEARCH_STEPS = binary_search_steps self.ABORT_EARLY = abort_early self.CONFIDENCE = confidence self.initial_const = initial_const self.batch_size = batch_size self.clip_min = clip_min self.clip_max = clip_max self.repeat = binary_search_steps >= 10 self.shape = shape = tuple([batch_size] + list(shape)) # the variable we're going to optimize over modifier = tf.Variable(np.zeros(shape, dtype=np_dtype)) # these are variables to be more efficient in sending data to tf self.timg = tf.Variable(np.zeros(shape), dtype=tf_dtype, name='timg') self.tlab = tf.Variable(np.zeros((batch_size, num_labels)), dtype=tf_dtype, name='tlab') self.const = tf.Variable(np.zeros(batch_size), dtype=tf_dtype, name='const') # and here's what we use to assign them self.assign_timg = tf.placeholder(tf_dtype, shape, name='assign_timg') self.assign_tlab = tf.placeholder(tf_dtype, (batch_size, num_labels), name='assign_tlab') self.assign_const = tf.placeholder(tf_dtype, [batch_size], name='assign_const') # the resulting instance, tanh'd to keep bounded from clip_min # to clip_max self.newimg = (tf.tanh(modifier + self.timg) + 1) / 2 self.newimg = self.newimg * (clip_max - clip_min) + clip_min # distance to the input data other = (tf.tanh(self.timg) + 1) / \ 2 * (clip_max - clip_min) + clip_min self.l2dist = reduce_sum(tf.square(self.newimg - other), list(range(1, len(shape)))) # prediction BEFORE-SOFTMAX of the model output = model.get_output(self.newimg) self.output = output # compute the probability of the label class versus the maximum other real = reduce_sum((self.tlab) * output, 1) other = reduce_max((1 - self.tlab) * output - self.tlab * 10000, 1) if self.TARGETED: # if targeted, optimize for making the other class most likely loss1 = tf.maximum(ZERO(), other - real + self.CONFIDENCE) else: # if untargeted, optimize for making this class least likely. loss1 = tf.maximum(ZERO(), real - other + self.CONFIDENCE) # Sum up the losses self.loss1 = self.const * loss1 self.loss = reduce_mean(self.loss1 + self.l2dist) # Setup the adam optimizer and keep track of variables we're creating start_vars = set(x.name for x in tf.global_variables()) optimizer = tf.train.AdamOptimizer(self.LEARNING_RATE) self.train = optimizer.minimize(self.loss, var_list=[modifier]) end_vars = tf.global_variables() new_vars = [x for x in end_vars if x.name not in start_vars] # these are the variables to initialize when we run self.setup = [] self.setup.append(self.timg.assign(self.assign_timg)) self.setup.append(self.tlab.assign(self.assign_tlab)) self.setup.append(self.const.assign(self.assign_const)) self.init = tf.variables_initializer(var_list=[modifier] + new_vars)