def __init__(self, model, batch_size, width_min=None, prob=0.9): assert isinstance(model, ClassifierDifferentiable) assert len(model.x_shape) == 3 Attack.__init__(self, model, batch_size) self.width_max = self.model.x_shape[1] self.width_min = width_min if width_min else self.width_max // 2 self.prob = prob self.x_dim = 1 for x in self.model.x_shape: self.x_dim *= x self.batch_size = batch_size self.xs_ph = tf.placeholder(model.x_dtype, (batch_size, self.x_dim)) self.ys_ph = tf.placeholder(model.y_dtype, (batch_size, )) self.xs_var = tf.Variable(tf.zeros_like(self.xs_ph)) self.xs_adv = tf.Variable(tf.zeros_like(self.xs_ph)) self.ys_var = tf.Variable(tf.zeros_like(self.ys_ph)) self.g_var = tf.Variable(tf.zeros_like(self.xs_var)) xs_adv_di = self._input_diversity( tf.reshape(self.xs_adv, (self.batch_size, *self.model.x_shape))) logits, _ = self.model.logits_and_labels(xs_adv_di) loss = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=self.ys_var, logits=logits) self.grad = tf.gradients(loss, self.xs_adv)[0] self.grad_l1 = tf.reduce_sum(tf.abs(self.grad), axis=1, keepdims=True) self.setup = [ self.xs_var.assign(self.xs_ph), self.xs_adv.assign(self.xs_ph), self.ys_var.assign(self.ys_ph), tf.variables_initializer([self.g_var]) ]
def __init__(self, model, batch_size): assert isinstance(model, Classifier) if batch_size != 1: raise NotImplementedError Attack.__init__(self, model=model, batch_size=batch_size) self.xs_ph = tf.placeholder(model.x_dtype, (None,) + self.model.x_shape) self.labels_pred = self.model.labels(self.xs_ph)
def __init__(self, model, batch_size): assert isinstance(model, ClassifierWithLogits) if batch_size != 1: raise NotImplementedError Attack.__init__(self, model, batch_size) self.xs_ph = tf.placeholder(model.x_dtype, (None, ) + self.model.x_shape) self.ys_ph = tf.placeholder(model.y_dtype, (None, )) logits, self.labels_pred = self.model.logits_and_labels(self.xs_ph) # NES uses margin logit loss proposed in C&W. logit_mask = tf.one_hot(self.ys_ph, self.model.n_class) label_logits = tf.reduce_sum(logit_mask * logits, axis=-1) highest_nonlabel_logits = tf.reduce_max(logits - logit_mask * 99999, axis=-1) self.loss = highest_nonlabel_logits - label_logits
def __init__(self, model, batch_size, overshot=0.02): Attack.__init__(self, model=model, batch_size=batch_size) assert isinstance(self.model, ClassifierDifferentiable) self.batch_size = batch_size self.overshot = overshot x_dim = 1 for x in self.model.x_shape: x_dim *= x self.x_dim = x_dim self.xs_ph = tf.placeholder(self.model.x_dtype, shape=(batch_size, x_dim)) self.ys_ph = tf.placeholder(self.model.y_dtype, shape=(batch_size, )) self.xs_var = tf.Variable(tf.zeros_like(self.xs_ph)) self.ys_var = tf.Variable(tf.zeros_like(self.ys_ph)) self.xs_adv = tf.Variable(tf.zeros_like(self.xs_ph)) logits, self.labels = self.model.logits_and_labels( xs_ph=tf.reshape(self.xs_adv, (batch_size, ) + self.model.x_shape)) k0s = tf.stack((tf.range(self.batch_size), self.ys_var), axis=1) grads = [ tf.gradients(logits[:, i], self.xs_adv)[0] for i in range(self.model.n_class) ] grads = tf.stack(grads, axis=0) grads = tf.transpose(grads, (1, 0, 2)) yk0s = tf.expand_dims(tf.gather_nd(logits, k0s), axis=1) gradk0s = tf.expand_dims(tf.gather_nd(grads, k0s), axis=1) self.fs = tf.abs(yk0s - logits) self.ws = grads - gradk0s self.iteration = None self.distance_metric = None
def __init__(self, model, batch_size, goal, distance_metric, decay_factor): assert isinstance(model, ClassifierDifferentiable) Attack.__init__(self, model=model, batch_size=batch_size) xs_shape = (self.batch_size, np.prod(self.model.x_shape)) ys_shape = (self.batch_size, ) model_xs_shape = (self.batch_size, *self.model.x_shape) xs_zeros = tf.zeros(xs_shape, dtype=self.model.x_dtype) self.xs_ph = get_xs_ph(self.model, self.batch_size) self.ys_ph = get_ys_ph(self.model, self.batch_size) self.eps_ph = tf.Variable(tf.zeros((self.batch_size, ))) self.alpha_ph = tf.Variable(tf.zeros((self.batch_size, ))) self.xs_var = tf.Variable(xs_zeros) self.ys_var = tf.Variable(tf.zeros(ys_shape, dtype=self.model.y_dtype)) self.g_var = tf.Variable(xs_zeros) self.eps_var = tf.Variable(tf.zeros((batch_size, ))) self.alpha_var = tf.Variable(tf.zeros((batch_size, ))) self.xs_adv_var = tf.Variable(xs_zeros) self.xs_adv = tf.reshape(self.xs_adv_var, model_xs_shape) logits, _ = self.model.logits_and_labels(self.xs_adv) loss = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=self.ys_var, logits=logits) self.grad = tf.gradients(loss, self.xs_adv_var)[0] self.grad_l1 = tf.reduce_sum(tf.abs(self.grad), axis=1) self.config_setup = [ self.eps_var.assign(self.eps_ph), self.alpha_var.assign(self.alpha_ph), ] xs_ph_in = tf.reshape(self.xs_ph, xs_shape) self.setup = [ self.xs_var.assign(xs_ph_in), self.ys_var.assign(self.ys_ph), self.xs_adv_var.assign(xs_ph_in), tf.variables_initializer([self.g_var]), ] eps = tf.expand_dims(self.eps_var, 1) alpha = tf.expand_dims(self.alpha_var, 1) g_next = decay_factor * self.g_var + \ self.grad / tf.expand_dims(self.grad_l1, 1) self.update_g_step = self.g_var.assign(g_next) if goal == 't' or goal == 'tm': g = -self.g_var elif goal == 'ut': g = self.g_var else: raise NotImplementedError if distance_metric == 'l_2': g_norm = tf.maximum(1e-12, tf.norm(g, axis=1)) g_unit = g / tf.expand_dims(g_norm, 1) d = self.xs_adv_var + alpha * g_unit - self.xs_var xs_next = self.xs_var + tf.clip_by_norm(d, eps, axes=[1]) elif distance_metric == 'l_inf': lo, hi = self.xs_var - eps, self.xs_var + eps d = self.xs_adv_var + alpha * tf.sign(g) - self.xs_var xs_next = tf.clip_by_value(self.xs_var + d, lo, hi) else: raise NotImplementedError xs_next = tf.clip_by_value(xs_next, self.model.x_min, self.model.x_max) self.step = self.xs_adv_var.assign(xs_next) self.goal = goal
def __init__(self, model, batch_size, goal, distance_metric, learning_rate, confidence): assert isinstance(model, ClassifierDifferentiable) Attack.__init__(self, model, batch_size) self.confidence = confidence def scale(vec, dst_lo, dst_hi, src_lo, src_hi): k = (dst_hi - dst_lo) / (src_hi - src_lo) b = dst_lo - k * src_lo return k * vec + b def scale_to_model(vec): return scale(vec, self.model.x_min, self.model.x_max, -1.0, 1.0) def scale_to_tanh(vec): return scale(vec, 1e-6 - 1, 1 - 1e-6, self.model.x_min, self.model.x_max) model_xs_shape = (self.batch_size, *self.model.x_shape) xs_shape = (self.batch_size, np.prod(self.model.x_shape)) xs_zeros = tf.zeros(xs_shape, dtype=self.model.x_dtype) self.xs_ph = get_xs_ph(self.model, self.batch_size) self.ys_ph = get_ys_ph(self.model, self.batch_size) self.cs_ph = tf.placeholder(self.model.x_dtype, (self.batch_size, )) xs_var = tf.Variable(xs_zeros) ys_var = tf.Variable(tf.zeros_like(self.ys_ph)) cs_var = tf.Variable(tf.zeros_like(self.cs_ph)) d_ws = tf.Variable(xs_zeros) ws = tf.atanh(scale_to_tanh(xs_var)) + d_ws self.xs_adv = scale_to_model(tf.tanh(ws)) self.xs_adv_output = tf.reshape(self.xs_adv, model_xs_shape) logits, _ = self.model.logits_and_labels(self.xs_adv_output) ys_one_hot = tf.one_hot(ys_var, self.model.n_class) logit_target = tf.reduce_sum(ys_one_hot * logits, 1) logit_other = (1 - ys_one_hot) * logits logit_other = logit_other - 0.5 * self.model.x_dtype.max * ys_one_hot logit_other = tf.reduce_max(logit_other, 1) self.setup_xs = xs_var.assign(tf.reshape(self.xs_ph, xs_shape)) self.setup_ys = ys_var.assign(self.ys_ph) self.setup_cs = cs_var.assign(self.cs_ph) self.setup_d_ws = d_ws.assign(tf.zeros_like(d_ws)) if distance_metric == 'l_2': dists = tf.reduce_sum(tf.square(self.xs_adv - xs_var), axis=1) elif distance_metric == 'l_inf': dists = tf.reduce_max(tf.abs(self.xs_adv - xs_var), axis=1) else: raise NotImplementedError if goal == 't' or goal == 'tm': score = tf.maximum(0.0, logit_other - logit_target + confidence) elif goal == 'ut': score = tf.maximum(0.0, logit_target - logit_other + confidence) else: raise NotImplementedError self.goal = goal loss = dists + cs_var * score optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) self.optimizer_step = optimizer.minimize(loss, var_list=[d_ws]) self.setup_optimizer = tf.variables_initializer(optimizer.variables()) self.score = score self.logits = logits self.dists = dists
def __init__(self, model, batch_size, goal, distance_metric, random_start=False): # TODO random_start assert isinstance(model, ClassifierDifferentiable) Attack.__init__(self, model=model, batch_size=batch_size) xs_shape = (self.batch_size, np.prod(self.model.x_shape)) ys_shape = (self.batch_size, ) model_xs_shape = (self.batch_size, *self.model.x_shape) xs_zeros = tf.zeros(xs_shape, dtype=self.model.x_dtype) self.xs_ph = get_xs_ph(self.model, self.batch_size) self.ys_ph = get_ys_ph(self.model, self.batch_size) self.eps_ph = tf.Variable(tf.zeros((self.batch_size, ))) self.alpha_ph = tf.Variable(tf.zeros((self.batch_size, ))) self.xs_var = tf.Variable(xs_zeros) self.ys_var = tf.Variable(tf.zeros(ys_shape, dtype=self.model.y_dtype)) self.eps_var = tf.Variable(tf.zeros((batch_size, ))) self.alpha_var = tf.Variable(tf.zeros((batch_size, ))) self.xs_adv_var = tf.Variable(xs_zeros) self.xs_adv = tf.reshape(self.xs_adv_var, model_xs_shape) logits, _ = self.model.logits_and_labels(self.xs_adv) loss = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=self.ys_var, logits=logits) self.grad = tf.gradients(loss, self.xs_adv_var)[0] self.config_setup = [ self.eps_var.assign(self.eps_ph), self.alpha_var.assign(self.alpha_ph), ] xs_ph_in = tf.reshape(self.xs_ph, xs_shape) self.setup = [ self.xs_var.assign(xs_ph_in), self.ys_var.assign(self.ys_ph), self.xs_adv_var.assign(xs_ph_in), ] eps = tf.expand_dims(self.eps_var, 1) alpha = tf.expand_dims(self.alpha_var, 1) if goal == 't' or goal == 'tm': grad = -self.grad elif goal == 'ut': grad = self.grad else: raise NotImplementedError if distance_metric == 'l_2': grad_norm = tf.maximum(1e-12, tf.norm(grad, axis=1)) grad_unit = grad / tf.expand_dims(grad_norm, 1) xs_next = self.xs_adv_var - self.xs_var + alpha * grad_unit xs_next = self.xs_var + tf.clip_by_norm(xs_next, eps, axes=[1]) elif distance_metric == 'l_inf': lo, hi = self.xs_var - eps, self.xs_var + eps xs_next = self.xs_adv_var + alpha * tf.sign(grad) xs_next = tf.clip_by_value(xs_next, lo, hi) else: raise NotImplementedError xs_next = tf.clip_by_value(xs_next, self.model.x_min, self.model.x_max) self.step = self.xs_adv_var.assign(xs_next) self.goal = goal