def __init__(self, model, batch_size, loss, goal, distance_metric, session): ''' Initialize FGSM. :param model: The model to attack. A `realsafe.model.Classifier` instance. :param batch_size: Batch size for the `batch_attack()` method. :param loss: The loss function to optimize. A `realsafe.loss.Loss` instance. :param goal: Adversarial goals. All supported values are 't', 'tm', and 'ut'. :param distance_metric: Adversarial distance metric. All supported values are 'l_2' and 'l_inf'. :param session: The `tf.Session` to run the attack in. The `model` should be loaded into this session. ''' self.model, self.batch_size, self._session = model, batch_size, session self.loss, self.goal, self.distance_metric = loss, goal, distance_metric # placeholder for batch_attack's input self.xs_ph = get_xs_ph(model, batch_size) self.ys_ph = get_ys_ph(model, batch_size) # magnitude self.eps_ph = tf.placeholder(self.model.x_dtype, (self.batch_size, )) self.eps_var = tf.Variable( tf.zeros((self.batch_size, ), dtype=self.model.x_dtype)) # calculate loss' gradient with relate to the input grad = tf.gradients(self.loss(self.xs_ph, self.ys_ph), self.xs_ph)[0] if goal == 't' or goal == 'tm': grad = -grad elif goal != 'ut': raise NotImplementedError # flatten the gradient for easier broadcast operations grad_flatten = tf.reshape(grad, (batch_size, -1)) # calculate update if distance_metric == 'l_2': grad_unit = get_unit(grad_flatten) update = tf.expand_dims(self.eps_var, 1) * grad_unit elif distance_metric == 'l_inf': update = tf.expand_dims(self.eps_var, 1) * tf.sign(grad_flatten) else: raise NotImplementedError update = tf.reshape(update, (self.batch_size, *self.model.x_shape)) # clip by (x_min, x_max) self.xs_adv = tf.clip_by_value(self.xs_ph + update, self.model.x_min, self.model.x_max) self.config_eps_step = self.eps_var.assign(self.eps_ph)
def __init__(self, model, batch_size, loss, goal, distance_metric, session, iteration_callback=None): ''' Initialize BIM. :param model: The model to attack. A ``realsafe.model.Classifier`` instance. :param batch_size: Batch size for the ``batch_attack()`` method. :param loss: The loss function to optimize. A ``realsafe.loss.Loss`` instance. :param goal: Adversarial goals. All supported values are ``'t'``, ``'tm'``, and ``'ut'``. :param distance_metric: Adversarial distance metric. All supported values are ``'l_2'`` and ``'l_inf'``. :param session: The ``tf.Session`` to run the attack in. The ``model`` should be loaded into this session. :param iteration_callback: A function accept a ``xs`` ``tf.Tensor`` (the original examples) and a ``xs_adv`` ``tf.Tensor`` (the adversarial examples for ``xs``). During ``batch_attack()``, this callback function would be runned after each iteration, and its return value would be yielded back to the caller. By default, ``iteration_callback`` is ``None``. ''' self.model, self.batch_size, self._session = model, batch_size, session self.loss, self.goal, self.distance_metric = loss, goal, distance_metric # placeholder for batch_attack's input self.xs_ph = get_xs_ph(model, batch_size) self.ys_ph = get_ys_ph(model, batch_size) # flatten shape of xs_ph xs_flatten_shape = (batch_size, np.prod(self.model.x_shape)) # store xs and ys in variables to reduce memory copy between tensorflow and python # variable for the original example with shape of (batch_size, D) self.xs_var = tf.Variable(tf.zeros(shape=xs_flatten_shape, dtype=self.model.x_dtype)) # variable for labels self.ys_var = tf.Variable(tf.zeros(shape=(batch_size,), dtype=self.model.y_dtype)) # variable for the (hopefully) adversarial example with shape of (batch_size, D) self.xs_adv_var = tf.Variable(tf.zeros(shape=xs_flatten_shape, dtype=self.model.x_dtype)) # magnitude self.eps_ph = tf.placeholder(self.model.x_dtype, (self.batch_size,)) self.eps_var = tf.Variable(tf.zeros((self.batch_size,), dtype=self.model.x_dtype)) # step size self.alpha_ph = tf.placeholder(self.model.x_dtype, (self.batch_size,)) self.alpha_var = tf.Variable(tf.zeros((self.batch_size,), dtype=self.model.x_dtype)) # expand dim for easier broadcast operations eps = tf.expand_dims(self.eps_var, 1) alpha = tf.expand_dims(self.alpha_var, 1) # calculate loss' gradient with relate to the adversarial example # grad.shape == (batch_size, D) self.xs_adv_model = tf.reshape(self.xs_adv_var, (batch_size, *self.model.x_shape)) self.loss = loss(self.xs_adv_model, self.ys_var) grad = tf.gradients(self.loss, self.xs_adv_var)[0] if goal == 't' or goal == 'tm': grad = -grad elif goal != 'ut': raise NotImplementedError # update the adversarial example if distance_metric == 'l_2': grad_unit = get_unit(grad) xs_adv_delta = self.xs_adv_var - self.xs_var + alpha * grad_unit # clip by max l_2 magnitude of adversarial noise xs_adv_next = self.xs_var + tf.clip_by_norm(xs_adv_delta, eps, axes=[1]) elif distance_metric == 'l_inf': xs_lo, xs_hi = self.xs_var - eps, self.xs_var + eps grad_sign = tf.sign(grad) # clip by max l_inf magnitude of adversarial noise xs_adv_next = tf.clip_by_value(self.xs_adv_var + alpha * grad_sign, xs_lo, xs_hi) else: raise NotImplementedError # clip by (x_min, x_max) xs_adv_next = tf.clip_by_value(xs_adv_next, self.model.x_min, self.model.x_max) self.update_xs_adv_step = self.xs_adv_var.assign(xs_adv_next) self.config_eps_step = self.eps_var.assign(self.eps_ph) self.config_alpha_step = self.alpha_var.assign(self.alpha_ph) self.setup_xs = [self.xs_var.assign(tf.reshape(self.xs_ph, xs_flatten_shape)), self.xs_adv_var.assign(tf.reshape(self.xs_ph, xs_flatten_shape))] self.setup_ys = self.ys_var.assign(self.ys_ph) self.iteration = None self.iteration_callback = None if iteration_callback is not None: xs_model = tf.reshape(self.xs_var, (self.batch_size, *self.model.x_shape)) self.iteration_callback = iteration_callback(xs_model, self.xs_adv_model)